Skip to content

Instantly share code, notes, and snippets.

@antmikinka
Created May 16, 2024 08:44
Show Gist options
  • Save antmikinka/a8d47712c1503d7aa929ee06d5cf3cfd to your computer and use it in GitHub Desktop.
Save antmikinka/a8d47712c1503d7aa929ee06d5cf3cfd to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
❯ python -m examples.models.llama2.export_llama --checkpoint /Users/anthonymikinka/executorch/llama-2-7b-chat/consolidated.00.pth --params /Users/anthonymikinka/executorch/llama-2-7b-chat/params.json -kv --coreml -qmode 8da4w[?12l[?25h
]2;python -m examples.models.llama2.export_llama --checkpoint --params -kv ]1;pythonCould not import fairseq2 modules.
INFO:root:Loading model with checkpoint=/Users/anthonymikinka/executorch/llama-2-7b-chat/consolidated.00.pth, params=/Users/anthonymikinka/executorch/llama-2-7b-chat/params.json, use_kv_cache=True, weight_type=WeightType.LLAMA
INFO:root:Loaded model with dtype=torch.bfloat16
INFO:datasets:PyTorch version 2.3.0 available.
linear: layers.0.attention.wq, in=4096, out=4096
linear: layers.0.attention.wk, in=4096, out=4096
linear: layers.0.attention.wv, in=4096, out=4096
linear: layers.0.attention.wo, in=4096, out=4096
linear: layers.0.feed_forward.w1, in=4096, out=11008
linear: layers.0.feed_forward.w2, in=11008, out=4096
linear: layers.0.feed_forward.w3, in=4096, out=11008
linear: layers.1.attention.wq, in=4096, out=4096
linear: layers.1.attention.wk, in=4096, out=4096
linear: layers.1.attention.wv, in=4096, out=4096
linear: layers.1.attention.wo, in=4096, out=4096
linear: layers.1.feed_forward.w1, in=4096, out=11008
linear: layers.1.feed_forward.w2, in=11008, out=4096
linear: layers.1.feed_forward.w3, in=4096, out=11008
linear: layers.2.attention.wq, in=4096, out=4096
linear: layers.2.attention.wk, in=4096, out=4096
linear: layers.2.attention.wv, in=4096, out=4096
linear: layers.2.attention.wo, in=4096, out=4096
linear: layers.2.feed_forward.w1, in=4096, out=11008
linear: layers.2.feed_forward.w2, in=11008, out=4096
linear: layers.2.feed_forward.w3, in=4096, out=11008
linear: layers.3.attention.wq, in=4096, out=4096
linear: layers.3.attention.wk, in=4096, out=4096
linear: layers.3.attention.wv, in=4096, out=4096
linear: layers.3.attention.wo, in=4096, out=4096
linear: layers.3.feed_forward.w1, in=4096, out=11008
linear: layers.3.feed_forward.w2, in=11008, out=4096
linear: layers.3.feed_forward.w3, in=4096, out=11008
linear: layers.4.attention.wq, in=4096, out=4096
linear: layers.4.attention.wk, in=4096, out=4096
linear: layers.4.attention.wv, in=4096, out=4096
linear: layers.4.attention.wo, in=4096, out=4096
linear: layers.4.feed_forward.w1, in=4096, out=11008
linear: layers.4.feed_forward.w2, in=11008, out=4096
linear: layers.4.feed_forward.w3, in=4096, out=11008
linear: layers.5.attention.wq, in=4096, out=4096
linear: layers.5.attention.wk, in=4096, out=4096
linear: layers.5.attention.wv, in=4096, out=4096
linear: layers.5.attention.wo, in=4096, out=4096
linear: layers.5.feed_forward.w1, in=4096, out=11008
linear: layers.5.feed_forward.w2, in=11008, out=4096
linear: layers.5.feed_forward.w3, in=4096, out=11008
linear: layers.6.attention.wq, in=4096, out=4096
linear: layers.6.attention.wk, in=4096, out=4096
linear: layers.6.attention.wv, in=4096, out=4096
linear: layers.6.attention.wo, in=4096, out=4096
linear: layers.6.feed_forward.w1, in=4096, out=11008
linear: layers.6.feed_forward.w2, in=11008, out=4096
linear: layers.6.feed_forward.w3, in=4096, out=11008
linear: layers.7.attention.wq, in=4096, out=4096
linear: layers.7.attention.wk, in=4096, out=4096
linear: layers.7.attention.wv, in=4096, out=4096
linear: layers.7.attention.wo, in=4096, out=4096
linear: layers.7.feed_forward.w1, in=4096, out=11008
linear: layers.7.feed_forward.w2, in=11008, out=4096
linear: layers.7.feed_forward.w3, in=4096, out=11008
linear: layers.8.attention.wq, in=4096, out=4096
linear: layers.8.attention.wk, in=4096, out=4096
linear: layers.8.attention.wv, in=4096, out=4096
linear: layers.8.attention.wo, in=4096, out=4096
linear: layers.8.feed_forward.w1, in=4096, out=11008
linear: layers.8.feed_forward.w2, in=11008, out=4096
linear: layers.8.feed_forward.w3, in=4096, out=11008
linear: layers.9.attention.wq, in=4096, out=4096
linear: layers.9.attention.wk, in=4096, out=4096
linear: layers.9.attention.wv, in=4096, out=4096
linear: layers.9.attention.wo, in=4096, out=4096
linear: layers.9.feed_forward.w1, in=4096, out=11008
linear: layers.9.feed_forward.w2, in=11008, out=4096
linear: layers.9.feed_forward.w3, in=4096, out=11008
linear: layers.10.attention.wq, in=4096, out=4096
linear: layers.10.attention.wk, in=4096, out=4096
linear: layers.10.attention.wv, in=4096, out=4096
linear: layers.10.attention.wo, in=4096, out=4096
linear: layers.10.feed_forward.w1, in=4096, out=11008
linear: layers.10.feed_forward.w2, in=11008, out=4096
linear: layers.10.feed_forward.w3, in=4096, out=11008
linear: layers.11.attention.wq, in=4096, out=4096
linear: layers.11.attention.wk, in=4096, out=4096
linear: layers.11.attention.wv, in=4096, out=4096
linear: layers.11.attention.wo, in=4096, out=4096
linear: layers.11.feed_forward.w1, in=4096, out=11008
linear: layers.11.feed_forward.w2, in=11008, out=4096
linear: layers.11.feed_forward.w3, in=4096, out=11008
linear: layers.12.attention.wq, in=4096, out=4096
linear: layers.12.attention.wk, in=4096, out=4096
linear: layers.12.attention.wv, in=4096, out=4096
linear: layers.12.attention.wo, in=4096, out=4096
linear: layers.12.feed_forward.w1, in=4096, out=11008
linear: layers.12.feed_forward.w2, in=11008, out=4096
linear: layers.12.feed_forward.w3, in=4096, out=11008
linear: layers.13.attention.wq, in=4096, out=4096
linear: layers.13.attention.wk, in=4096, out=4096
linear: layers.13.attention.wv, in=4096, out=4096
linear: layers.13.attention.wo, in=4096, out=4096
linear: layers.13.feed_forward.w1, in=4096, out=11008
linear: layers.13.feed_forward.w2, in=11008, out=4096
linear: layers.13.feed_forward.w3, in=4096, out=11008
linear: layers.14.attention.wq, in=4096, out=4096
linear: layers.14.attention.wk, in=4096, out=4096
linear: layers.14.attention.wv, in=4096, out=4096
linear: layers.14.attention.wo, in=4096, out=4096
linear: layers.14.feed_forward.w1, in=4096, out=11008
linear: layers.14.feed_forward.w2, in=11008, out=4096
linear: layers.14.feed_forward.w3, in=4096, out=11008
linear: layers.15.attention.wq, in=4096, out=4096
linear: layers.15.attention.wk, in=4096, out=4096
linear: layers.15.attention.wv, in=4096, out=4096
linear: layers.15.attention.wo, in=4096, out=4096
linear: layers.15.feed_forward.w1, in=4096, out=11008
linear: layers.15.feed_forward.w2, in=11008, out=4096
linear: layers.15.feed_forward.w3, in=4096, out=11008
linear: layers.16.attention.wq, in=4096, out=4096
linear: layers.16.attention.wk, in=4096, out=4096
linear: layers.16.attention.wv, in=4096, out=4096
linear: layers.16.attention.wo, in=4096, out=4096
linear: layers.16.feed_forward.w1, in=4096, out=11008
linear: layers.16.feed_forward.w2, in=11008, out=4096
linear: layers.16.feed_forward.w3, in=4096, out=11008
linear: layers.17.attention.wq, in=4096, out=4096
linear: layers.17.attention.wk, in=4096, out=4096
linear: layers.17.attention.wv, in=4096, out=4096
linear: layers.17.attention.wo, in=4096, out=4096
linear: layers.17.feed_forward.w1, in=4096, out=11008
linear: layers.17.feed_forward.w2, in=11008, out=4096
linear: layers.17.feed_forward.w3, in=4096, out=11008
linear: layers.18.attention.wq, in=4096, out=4096
linear: layers.18.attention.wk, in=4096, out=4096
linear: layers.18.attention.wv, in=4096, out=4096
linear: layers.18.attention.wo, in=4096, out=4096
linear: layers.18.feed_forward.w1, in=4096, out=11008
linear: layers.18.feed_forward.w2, in=11008, out=4096
linear: layers.18.feed_forward.w3, in=4096, out=11008
linear: layers.19.attention.wq, in=4096, out=4096
linear: layers.19.attention.wk, in=4096, out=4096
linear: layers.19.attention.wv, in=4096, out=4096
linear: layers.19.attention.wo, in=4096, out=4096
linear: layers.19.feed_forward.w1, in=4096, out=11008
linear: layers.19.feed_forward.w2, in=11008, out=4096
linear: layers.19.feed_forward.w3, in=4096, out=11008
linear: layers.20.attention.wq, in=4096, out=4096
linear: layers.20.attention.wk, in=4096, out=4096
linear: layers.20.attention.wv, in=4096, out=4096
linear: layers.20.attention.wo, in=4096, out=4096
linear: layers.20.feed_forward.w1, in=4096, out=11008
linear: layers.20.feed_forward.w2, in=11008, out=4096
linear: layers.20.feed_forward.w3, in=4096, out=11008
linear: layers.21.attention.wq, in=4096, out=4096
linear: layers.21.attention.wk, in=4096, out=4096
linear: layers.21.attention.wv, in=4096, out=4096
linear: layers.21.attention.wo, in=4096, out=4096
linear: layers.21.feed_forward.w1, in=4096, out=11008
linear: layers.21.feed_forward.w2, in=11008, out=4096
linear: layers.21.feed_forward.w3, in=4096, out=11008
linear: layers.22.attention.wq, in=4096, out=4096
linear: layers.22.attention.wk, in=4096, out=4096
linear: layers.22.attention.wv, in=4096, out=4096
linear: layers.22.attention.wo, in=4096, out=4096
linear: layers.22.feed_forward.w1, in=4096, out=11008
linear: layers.22.feed_forward.w2, in=11008, out=4096
linear: layers.22.feed_forward.w3, in=4096, out=11008
linear: layers.23.attention.wq, in=4096, out=4096
linear: layers.23.attention.wk, in=4096, out=4096
linear: layers.23.attention.wv, in=4096, out=4096
linear: layers.23.attention.wo, in=4096, out=4096
linear: layers.23.feed_forward.w1, in=4096, out=11008
linear: layers.23.feed_forward.w2, in=11008, out=4096
linear: layers.23.feed_forward.w3, in=4096, out=11008
linear: layers.24.attention.wq, in=4096, out=4096
linear: layers.24.attention.wk, in=4096, out=4096
linear: layers.24.attention.wv, in=4096, out=4096
linear: layers.24.attention.wo, in=4096, out=4096
linear: layers.24.feed_forward.w1, in=4096, out=11008
linear: layers.24.feed_forward.w2, in=11008, out=4096
linear: layers.24.feed_forward.w3, in=4096, out=11008
linear: layers.25.attention.wq, in=4096, out=4096
linear: layers.25.attention.wk, in=4096, out=4096
linear: layers.25.attention.wv, in=4096, out=4096
linear: layers.25.attention.wo, in=4096, out=4096
linear: layers.25.feed_forward.w1, in=4096, out=11008
linear: layers.25.feed_forward.w2, in=11008, out=4096
linear: layers.25.feed_forward.w3, in=4096, out=11008
linear: layers.26.attention.wq, in=4096, out=4096
linear: layers.26.attention.wk, in=4096, out=4096
linear: layers.26.attention.wv, in=4096, out=4096
linear: layers.26.attention.wo, in=4096, out=4096
linear: layers.26.feed_forward.w1, in=4096, out=11008
linear: layers.26.feed_forward.w2, in=11008, out=4096
linear: layers.26.feed_forward.w3, in=4096, out=11008
linear: layers.27.attention.wq, in=4096, out=4096
linear: layers.27.attention.wk, in=4096, out=4096
linear: layers.27.attention.wv, in=4096, out=4096
linear: layers.27.attention.wo, in=4096, out=4096
linear: layers.27.feed_forward.w1, in=4096, out=11008
linear: layers.27.feed_forward.w2, in=11008, out=4096
linear: layers.27.feed_forward.w3, in=4096, out=11008
linear: layers.28.attention.wq, in=4096, out=4096
linear: layers.28.attention.wk, in=4096, out=4096
linear: layers.28.attention.wv, in=4096, out=4096
linear: layers.28.attention.wo, in=4096, out=4096
linear: layers.28.feed_forward.w1, in=4096, out=11008
linear: layers.28.feed_forward.w2, in=11008, out=4096
linear: layers.28.feed_forward.w3, in=4096, out=11008
linear: layers.29.attention.wq, in=4096, out=4096
linear: layers.29.attention.wk, in=4096, out=4096
linear: layers.29.attention.wv, in=4096, out=4096
linear: layers.29.attention.wo, in=4096, out=4096
linear: layers.29.feed_forward.w1, in=4096, out=11008
linear: layers.29.feed_forward.w2, in=11008, out=4096
linear: layers.29.feed_forward.w3, in=4096, out=11008
linear: layers.30.attention.wq, in=4096, out=4096
linear: layers.30.attention.wk, in=4096, out=4096
linear: layers.30.attention.wv, in=4096, out=4096
linear: layers.30.attention.wo, in=4096, out=4096
linear: layers.30.feed_forward.w1, in=4096, out=11008
linear: layers.30.feed_forward.w2, in=11008, out=4096
linear: layers.30.feed_forward.w3, in=4096, out=11008
linear: layers.31.attention.wq, in=4096, out=4096
linear: layers.31.attention.wk, in=4096, out=4096
linear: layers.31.attention.wv, in=4096, out=4096
linear: layers.31.attention.wo, in=4096, out=4096
linear: layers.31.feed_forward.w1, in=4096, out=11008
linear: layers.31.feed_forward.w2, in=11008, out=4096
linear: layers.31.feed_forward.w3, in=4096, out=11008
linear: output, in=4096, out=32000
INFO:root:model.to torch.float32
INFO:root:Core ATen graph:
graph():
%arg0_1 : [num_users=1] = placeholder[target=arg0_1]
%arg1_1 : [num_users=1] = placeholder[target=arg1_1]
%arg2_1 : [num_users=1] = placeholder[target=arg2_1]
%arg3_1 : [num_users=1] = placeholder[target=arg3_1]
%arg4_1 : [num_users=1] = placeholder[target=arg4_1]
%arg5_1 : [num_users=1] = placeholder[target=arg5_1]
%arg6_1 : [num_users=1] = placeholder[target=arg6_1]
%arg7_1 : [num_users=1] = placeholder[target=arg7_1]
%arg8_1 : [num_users=1] = placeholder[target=arg8_1]
%arg9_1 : [num_users=1] = placeholder[target=arg9_1]
%arg10_1 : [num_users=1] = placeholder[target=arg10_1]
%arg11_1 : [num_users=1] = placeholder[target=arg11_1]
%arg12_1 : [num_users=1] = placeholder[target=arg12_1]
%arg13_1 : [num_users=1] = placeholder[target=arg13_1]
%arg14_1 : [num_users=1] = placeholder[target=arg14_1]
%arg15_1 : [num_users=1] = placeholder[target=arg15_1]
%arg16_1 : [num_users=1] = placeholder[target=arg16_1]
%arg17_1 : [num_users=1] = placeholder[target=arg17_1]
%arg18_1 : [num_users=1] = placeholder[target=arg18_1]
%arg19_1 : [num_users=1] = placeholder[target=arg19_1]
%arg20_1 : [num_users=1] = placeholder[target=arg20_1]
%arg21_1 : [num_users=1] = placeholder[target=arg21_1]
%arg22_1 : [num_users=1] = placeholder[target=arg22_1]
%arg23_1 : [num_users=1] = placeholder[target=arg23_1]
%arg24_1 : [num_users=1] = placeholder[target=arg24_1]
%arg25_1 : [num_users=1] = placeholder[target=arg25_1]
%arg26_1 : [num_users=1] = placeholder[target=arg26_1]
%arg27_1 : [num_users=1] = placeholder[target=arg27_1]
%arg28_1 : [num_users=1] = placeholder[target=arg28_1]
%arg29_1 : [num_users=1] = placeholder[target=arg29_1]
%arg30_1 : [num_users=1] = placeholder[target=arg30_1]
%arg31_1 : [num_users=1] = placeholder[target=arg31_1]
%arg32_1 : [num_users=1] = placeholder[target=arg32_1]
%arg33_1 : [num_users=1] = placeholder[target=arg33_1]
%arg34_1 : [num_users=1] = placeholder[target=arg34_1]
%arg35_1 : [num_users=1] = placeholder[target=arg35_1]
%arg36_1 : [num_users=1] = placeholder[target=arg36_1]
%arg37_1 : [num_users=1] = placeholder[target=arg37_1]
%arg38_1 : [num_users=1] = placeholder[target=arg38_1]
%arg39_1 : [num_users=1] = placeholder[target=arg39_1]
%arg40_1 : [num_users=1] = placeholder[target=arg40_1]
%arg41_1 : [num_users=1] = placeholder[target=arg41_1]
%arg42_1 : [num_users=1] = placeholder[target=arg42_1]
%arg43_1 : [num_users=1] = placeholder[target=arg43_1]
%arg44_1 : [num_users=1] = placeholder[target=arg44_1]
%arg45_1 : [num_users=1] = placeholder[target=arg45_1]
%arg46_1 : [num_users=1] = placeholder[target=arg46_1]
%arg47_1 : [num_users=1] = placeholder[target=arg47_1]
%arg48_1 : [num_users=1] = placeholder[target=arg48_1]
%arg49_1 : [num_users=1] = placeholder[target=arg49_1]
%arg50_1 : [num_users=1] = placeholder[target=arg50_1]
%arg51_1 : [num_users=1] = placeholder[target=arg51_1]
%arg52_1 : [num_users=1] = placeholder[target=arg52_1]
%arg53_1 : [num_users=1] = placeholder[target=arg53_1]
%arg54_1 : [num_users=1] = placeholder[target=arg54_1]
%arg55_1 : [num_users=1] = placeholder[target=arg55_1]
%arg56_1 : [num_users=1] = placeholder[target=arg56_1]
%arg57_1 : [num_users=1] = placeholder[target=arg57_1]
%arg58_1 : [num_users=1] = placeholder[target=arg58_1]
%arg59_1 : [num_users=1] = placeholder[target=arg59_1]
%arg60_1 : [num_users=1] = placeholder[target=arg60_1]
%arg61_1 : [num_users=1] = placeholder[target=arg61_1]
%arg62_1 : [num_users=1] = placeholder[target=arg62_1]
%arg63_1 : [num_users=1] = placeholder[target=arg63_1]
%arg64_1 : [num_users=1] = placeholder[target=arg64_1]
%arg65_1 : [num_users=1] = placeholder[target=arg65_1]
%arg66_1 : [num_users=1] = placeholder[target=arg66_1]
%arg67_1 : [num_users=1] = placeholder[target=arg67_1]
%arg68_1 : [num_users=1] = placeholder[target=arg68_1]
%arg69_1 : [num_users=1] = placeholder[target=arg69_1]
%arg70_1 : [num_users=1] = placeholder[target=arg70_1]
%arg71_1 : [num_users=1] = placeholder[target=arg71_1]
%arg72_1 : [num_users=1] = placeholder[target=arg72_1]
%arg73_1 : [num_users=1] = placeholder[target=arg73_1]
%arg74_1 : [num_users=1] = placeholder[target=arg74_1]
%arg75_1 : [num_users=1] = placeholder[target=arg75_1]
%arg76_1 : [num_users=1] = placeholder[target=arg76_1]
%arg77_1 : [num_users=3] = placeholder[target=arg77_1]
%arg78_1 : [num_users=3] = placeholder[target=arg78_1]
%arg79_1 : [num_users=1] = placeholder[target=arg79_1]
%arg80_1 : [num_users=1] = placeholder[target=arg80_1]
%arg81_1 : [num_users=1] = placeholder[target=arg81_1]
%arg82_1 : [num_users=1] = placeholder[target=arg82_1]
%arg83_1 : [num_users=1] = placeholder[target=arg83_1]
%arg84_1 : [num_users=1] = placeholder[target=arg84_1]
%arg85_1 : [num_users=1] = placeholder[target=arg85_1]
%arg86_1 : [num_users=1] = placeholder[target=arg86_1]
%arg87_1 : [num_users=1] = placeholder[target=arg87_1]
%arg88_1 : [num_users=1] = placeholder[target=arg88_1]
%arg89_1 : [num_users=1] = placeholder[target=arg89_1]
%arg90_1 : [num_users=1] = placeholder[target=arg90_1]
%arg91_1 : [num_users=1] = placeholder[target=arg91_1]
%arg92_1 : [num_users=1] = placeholder[target=arg92_1]
%arg93_1 : [num_users=1] = placeholder[target=arg93_1]
%arg94_1 : [num_users=1] = placeholder[target=arg94_1]
%arg95_1 : [num_users=1] = placeholder[target=arg95_1]
%arg96_1 : [num_users=1] = placeholder[target=arg96_1]
%arg97_1 : [num_users=1] = placeholder[target=arg97_1]
%arg98_1 : [num_users=1] = placeholder[target=arg98_1]
%arg99_1 : [num_users=1] = placeholder[target=arg99_1]
%arg100_1 : [num_users=1] = placeholder[target=arg100_1]
%arg101_1 : [num_users=3] = placeholder[target=arg101_1]
%arg102_1 : [num_users=3] = placeholder[target=arg102_1]
%arg103_1 : [num_users=1] = placeholder[target=arg103_1]
%arg104_1 : [num_users=1] = placeholder[target=arg104_1]
%arg105_1 : [num_users=1] = placeholder[target=arg105_1]
%arg106_1 : [num_users=1] = placeholder[target=arg106_1]
%arg107_1 : [num_users=1] = placeholder[target=arg107_1]
%arg108_1 : [num_users=1] = placeholder[target=arg108_1]
%arg109_1 : [num_users=1] = placeholder[target=arg109_1]
%arg110_1 : [num_users=1] = placeholder[target=arg110_1]
%arg111_1 : [num_users=1] = placeholder[target=arg111_1]
%arg112_1 : [num_users=1] = placeholder[target=arg112_1]
%arg113_1 : [num_users=1] = placeholder[target=arg113_1]
%arg114_1 : [num_users=1] = placeholder[target=arg114_1]
%arg115_1 : [num_users=1] = placeholder[target=arg115_1]
%arg116_1 : [num_users=1] = placeholder[target=arg116_1]
%arg117_1 : [num_users=1] = placeholder[target=arg117_1]
%arg118_1 : [num_users=1] = placeholder[target=arg118_1]
%arg119_1 : [num_users=1] = placeholder[target=arg119_1]
%arg120_1 : [num_users=1] = placeholder[target=arg120_1]
%arg121_1 : [num_users=1] = placeholder[target=arg121_1]
%arg122_1 : [num_users=1] = placeholder[target=arg122_1]
%arg123_1 : [num_users=1] = placeholder[target=arg123_1]
%arg124_1 : [num_users=1] = placeholder[target=arg124_1]
%arg125_1 : [num_users=3] = placeholder[target=arg125_1]
%arg126_1 : [num_users=3] = placeholder[target=arg126_1]
%arg127_1 : [num_users=1] = placeholder[target=arg127_1]
%arg128_1 : [num_users=1] = placeholder[target=arg128_1]
%arg129_1 : [num_users=1] = placeholder[target=arg129_1]
%arg130_1 : [num_users=1] = placeholder[target=arg130_1]
%arg131_1 : [num_users=1] = placeholder[target=arg131_1]
%arg132_1 : [num_users=1] = placeholder[target=arg132_1]
%arg133_1 : [num_users=1] = placeholder[target=arg133_1]
%arg134_1 : [num_users=1] = placeholder[target=arg134_1]
%arg135_1 : [num_users=1] = placeholder[target=arg135_1]
%arg136_1 : [num_users=1] = placeholder[target=arg136_1]
%arg137_1 : [num_users=1] = placeholder[target=arg137_1]
%arg138_1 : [num_users=1] = placeholder[target=arg138_1]
%arg139_1 : [num_users=1] = placeholder[target=arg139_1]
%arg140_1 : [num_users=1] = placeholder[target=arg140_1]
%arg141_1 : [num_users=1] = placeholder[target=arg141_1]
%arg142_1 : [num_users=1] = placeholder[target=arg142_1]
%arg143_1 : [num_users=1] = placeholder[target=arg143_1]
%arg144_1 : [num_users=1] = placeholder[target=arg144_1]
%arg145_1 : [num_users=1] = placeholder[target=arg145_1]
%arg146_1 : [num_users=1] = placeholder[target=arg146_1]
%arg147_1 : [num_users=1] = placeholder[target=arg147_1]
%arg148_1 : [num_users=1] = placeholder[target=arg148_1]
%arg149_1 : [num_users=3] = placeholder[target=arg149_1]
%arg150_1 : [num_users=3] = placeholder[target=arg150_1]
%arg151_1 : [num_users=1] = placeholder[target=arg151_1]
%arg152_1 : [num_users=1] = placeholder[target=arg152_1]
%arg153_1 : [num_users=1] = placeholder[target=arg153_1]
%arg154_1 : [num_users=1] = placeholder[target=arg154_1]
%arg155_1 : [num_users=1] = placeholder[target=arg155_1]
%arg156_1 : [num_users=1] = placeholder[target=arg156_1]
%arg157_1 : [num_users=1] = placeholder[target=arg157_1]
%arg158_1 : [num_users=1] = placeholder[target=arg158_1]
%arg159_1 : [num_users=1] = placeholder[target=arg159_1]
%arg160_1 : [num_users=1] = placeholder[target=arg160_1]
%arg161_1 : [num_users=1] = placeholder[target=arg161_1]
%arg162_1 : [num_users=1] = placeholder[target=arg162_1]
%arg163_1 : [num_users=1] = placeholder[target=arg163_1]
%arg164_1 : [num_users=1] = placeholder[target=arg164_1]
%arg165_1 : [num_users=1] = placeholder[target=arg165_1]
%arg166_1 : [num_users=1] = placeholder[target=arg166_1]
%arg167_1 : [num_users=1] = placeholder[target=arg167_1]
%arg168_1 : [num_users=1] = placeholder[target=arg168_1]
%arg169_1 : [num_users=1] = placeholder[target=arg169_1]
%arg170_1 : [num_users=1] = placeholder[target=arg170_1]
%arg171_1 : [num_users=1] = placeholder[target=arg171_1]
%arg172_1 : [num_users=1] = placeholder[target=arg172_1]
%arg173_1 : [num_users=3] = placeholder[target=arg173_1]
%arg174_1 : [num_users=3] = placeholder[target=arg174_1]
%arg175_1 : [num_users=1] = placeholder[target=arg175_1]
%arg176_1 : [num_users=1] = placeholder[target=arg176_1]
%arg177_1 : [num_users=1] = placeholder[target=arg177_1]
%arg178_1 : [num_users=1] = placeholder[target=arg178_1]
%arg179_1 : [num_users=1] = placeholder[target=arg179_1]
%arg180_1 : [num_users=1] = placeholder[target=arg180_1]
%arg181_1 : [num_users=1] = placeholder[target=arg181_1]
%arg182_1 : [num_users=1] = placeholder[target=arg182_1]
%arg183_1 : [num_users=1] = placeholder[target=arg183_1]
%arg184_1 : [num_users=1] = placeholder[target=arg184_1]
%arg185_1 : [num_users=1] = placeholder[target=arg185_1]
%arg186_1 : [num_users=1] = placeholder[target=arg186_1]
%arg187_1 : [num_users=1] = placeholder[target=arg187_1]
%arg188_1 : [num_users=1] = placeholder[target=arg188_1]
%arg189_1 : [num_users=1] = placeholder[target=arg189_1]
%arg190_1 : [num_users=1] = placeholder[target=arg190_1]
%arg191_1 : [num_users=1] = placeholder[target=arg191_1]
%arg192_1 : [num_users=1] = placeholder[target=arg192_1]
%arg193_1 : [num_users=1] = placeholder[target=arg193_1]
%arg194_1 : [num_users=1] = placeholder[target=arg194_1]
%arg195_1 : [num_users=1] = placeholder[target=arg195_1]
%arg196_1 : [num_users=1] = placeholder[target=arg196_1]
%arg197_1 : [num_users=3] = placeholder[target=arg197_1]
%arg198_1 : [num_users=3] = placeholder[target=arg198_1]
%arg199_1 : [num_users=1] = placeholder[target=arg199_1]
%arg200_1 : [num_users=1] = placeholder[target=arg200_1]
%arg201_1 : [num_users=1] = placeholder[target=arg201_1]
%arg202_1 : [num_users=1] = placeholder[target=arg202_1]
%arg203_1 : [num_users=1] = placeholder[target=arg203_1]
%arg204_1 : [num_users=1] = placeholder[target=arg204_1]
%arg205_1 : [num_users=1] = placeholder[target=arg205_1]
%arg206_1 : [num_users=1] = placeholder[target=arg206_1]
%arg207_1 : [num_users=1] = placeholder[target=arg207_1]
%arg208_1 : [num_users=1] = placeholder[target=arg208_1]
%arg209_1 : [num_users=1] = placeholder[target=arg209_1]
%arg210_1 : [num_users=1] = placeholder[target=arg210_1]
%arg211_1 : [num_users=1] = placeholder[target=arg211_1]
%arg212_1 : [num_users=1] = placeholder[target=arg212_1]
%arg213_1 : [num_users=1] = placeholder[target=arg213_1]
%arg214_1 : [num_users=1] = placeholder[target=arg214_1]
%arg215_1 : [num_users=1] = placeholder[target=arg215_1]
%arg216_1 : [num_users=1] = placeholder[target=arg216_1]
%arg217_1 : [num_users=1] = placeholder[target=arg217_1]
%arg218_1 : [num_users=1] = placeholder[target=arg218_1]
%arg219_1 : [num_users=1] = placeholder[target=arg219_1]
%arg220_1 : [num_users=1] = placeholder[target=arg220_1]
%arg221_1 : [num_users=3] = placeholder[target=arg221_1]
%arg222_1 : [num_users=3] = placeholder[target=arg222_1]
%arg223_1 : [num_users=1] = placeholder[target=arg223_1]
%arg224_1 : [num_users=1] = placeholder[target=arg224_1]
%arg225_1 : [num_users=1] = placeholder[target=arg225_1]
%arg226_1 : [num_users=1] = placeholder[target=arg226_1]
%arg227_1 : [num_users=1] = placeholder[target=arg227_1]
%arg228_1 : [num_users=1] = placeholder[target=arg228_1]
%arg229_1 : [num_users=1] = placeholder[target=arg229_1]
%arg230_1 : [num_users=1] = placeholder[target=arg230_1]
%arg231_1 : [num_users=1] = placeholder[target=arg231_1]
%arg232_1 : [num_users=1] = placeholder[target=arg232_1]
%arg233_1 : [num_users=1] = placeholder[target=arg233_1]
%arg234_1 : [num_users=1] = placeholder[target=arg234_1]
%arg235_1 : [num_users=1] = placeholder[target=arg235_1]
%arg236_1 : [num_users=1] = placeholder[target=arg236_1]
%arg237_1 : [num_users=1] = placeholder[target=arg237_1]
%arg238_1 : [num_users=1] = placeholder[target=arg238_1]
%arg239_1 : [num_users=1] = placeholder[target=arg239_1]
%arg240_1 : [num_users=1] = placeholder[target=arg240_1]
%arg241_1 : [num_users=1] = placeholder[target=arg241_1]
%arg242_1 : [num_users=1] = placeholder[target=arg242_1]
%arg243_1 : [num_users=1] = placeholder[target=arg243_1]
%arg244_1 : [num_users=1] = placeholder[target=arg244_1]
%arg245_1 : [num_users=3] = placeholder[target=arg245_1]
%arg246_1 : [num_users=3] = placeholder[target=arg246_1]
%arg247_1 : [num_users=1] = placeholder[target=arg247_1]
%arg248_1 : [num_users=1] = placeholder[target=arg248_1]
%arg249_1 : [num_users=1] = placeholder[target=arg249_1]
%arg250_1 : [num_users=1] = placeholder[target=arg250_1]
%arg251_1 : [num_users=1] = placeholder[target=arg251_1]
%arg252_1 : [num_users=1] = placeholder[target=arg252_1]
%arg253_1 : [num_users=1] = placeholder[target=arg253_1]
%arg254_1 : [num_users=1] = placeholder[target=arg254_1]
%arg255_1 : [num_users=1] = placeholder[target=arg255_1]
%arg256_1 : [num_users=1] = placeholder[target=arg256_1]
%arg257_1 : [num_users=1] = placeholder[target=arg257_1]
%arg258_1 : [num_users=1] = placeholder[target=arg258_1]
%arg259_1 : [num_users=1] = placeholder[target=arg259_1]
%arg260_1 : [num_users=1] = placeholder[target=arg260_1]
%arg261_1 : [num_users=1] = placeholder[target=arg261_1]
%arg262_1 : [num_users=1] = placeholder[target=arg262_1]
%arg263_1 : [num_users=1] = placeholder[target=arg263_1]
%arg264_1 : [num_users=1] = placeholder[target=arg264_1]
%arg265_1 : [num_users=1] = placeholder[target=arg265_1]
%arg266_1 : [num_users=1] = placeholder[target=arg266_1]
%arg267_1 : [num_users=1] = placeholder[target=arg267_1]
%arg268_1 : [num_users=1] = placeholder[target=arg268_1]
%arg269_1 : [num_users=3] = placeholder[target=arg269_1]
%arg270_1 : [num_users=3] = placeholder[target=arg270_1]
%arg271_1 : [num_users=1] = placeholder[target=arg271_1]
%arg272_1 : [num_users=1] = placeholder[target=arg272_1]
%arg273_1 : [num_users=1] = placeholder[target=arg273_1]
%arg274_1 : [num_users=1] = placeholder[target=arg274_1]
%arg275_1 : [num_users=1] = placeholder[target=arg275_1]
%arg276_1 : [num_users=1] = placeholder[target=arg276_1]
%arg277_1 : [num_users=1] = placeholder[target=arg277_1]
%arg278_1 : [num_users=1] = placeholder[target=arg278_1]
%arg279_1 : [num_users=1] = placeholder[target=arg279_1]
%arg280_1 : [num_users=1] = placeholder[target=arg280_1]
%arg281_1 : [num_users=1] = placeholder[target=arg281_1]
%arg282_1 : [num_users=1] = placeholder[target=arg282_1]
%arg283_1 : [num_users=1] = placeholder[target=arg283_1]
%arg284_1 : [num_users=1] = placeholder[target=arg284_1]
%arg285_1 : [num_users=1] = placeholder[target=arg285_1]
%arg286_1 : [num_users=1] = placeholder[target=arg286_1]
%arg287_1 : [num_users=1] = placeholder[target=arg287_1]
%arg288_1 : [num_users=1] = placeholder[target=arg288_1]
%arg289_1 : [num_users=1] = placeholder[target=arg289_1]
%arg290_1 : [num_users=1] = placeholder[target=arg290_1]
%arg291_1 : [num_users=1] = placeholder[target=arg291_1]
%arg292_1 : [num_users=1] = placeholder[target=arg292_1]
%arg293_1 : [num_users=3] = placeholder[target=arg293_1]
%arg294_1 : [num_users=3] = placeholder[target=arg294_1]
%arg295_1 : [num_users=1] = placeholder[target=arg295_1]
%arg296_1 : [num_users=1] = placeholder[target=arg296_1]
%arg297_1 : [num_users=1] = placeholder[target=arg297_1]
%arg298_1 : [num_users=1] = placeholder[target=arg298_1]
%arg299_1 : [num_users=1] = placeholder[target=arg299_1]
%arg300_1 : [num_users=1] = placeholder[target=arg300_1]
%arg301_1 : [num_users=1] = placeholder[target=arg301_1]
%arg302_1 : [num_users=1] = placeholder[target=arg302_1]
%arg303_1 : [num_users=1] = placeholder[target=arg303_1]
%arg304_1 : [num_users=1] = placeholder[target=arg304_1]
%arg305_1 : [num_users=1] = placeholder[target=arg305_1]
%arg306_1 : [num_users=1] = placeholder[target=arg306_1]
%arg307_1 : [num_users=1] = placeholder[target=arg307_1]
%arg308_1 : [num_users=1] = placeholder[target=arg308_1]
%arg309_1 : [num_users=1] = placeholder[target=arg309_1]
%arg310_1 : [num_users=1] = placeholder[target=arg310_1]
%arg311_1 : [num_users=1] = placeholder[target=arg311_1]
%arg312_1 : [num_users=1] = placeholder[target=arg312_1]
%arg313_1 : [num_users=1] = placeholder[target=arg313_1]
%arg314_1 : [num_users=1] = placeholder[target=arg314_1]
%arg315_1 : [num_users=1] = placeholder[target=arg315_1]
%arg316_1 : [num_users=1] = placeholder[target=arg316_1]
%arg317_1 : [num_users=3] = placeholder[target=arg317_1]
%arg318_1 : [num_users=3] = placeholder[target=arg318_1]
%arg319_1 : [num_users=1] = placeholder[target=arg319_1]
%arg320_1 : [num_users=1] = placeholder[target=arg320_1]
%arg321_1 : [num_users=1] = placeholder[target=arg321_1]
%arg322_1 : [num_users=1] = placeholder[target=arg322_1]
%arg323_1 : [num_users=1] = placeholder[target=arg323_1]
%arg324_1 : [num_users=1] = placeholder[target=arg324_1]
%arg325_1 : [num_users=1] = placeholder[target=arg325_1]
%arg326_1 : [num_users=1] = placeholder[target=arg326_1]
%arg327_1 : [num_users=1] = placeholder[target=arg327_1]
%arg328_1 : [num_users=1] = placeholder[target=arg328_1]
%arg329_1 : [num_users=1] = placeholder[target=arg329_1]
%arg330_1 : [num_users=1] = placeholder[target=arg330_1]
%arg331_1 : [num_users=1] = placeholder[target=arg331_1]
%arg332_1 : [num_users=1] = placeholder[target=arg332_1]
%arg333_1 : [num_users=1] = placeholder[target=arg333_1]
%arg334_1 : [num_users=1] = placeholder[target=arg334_1]
%arg335_1 : [num_users=1] = placeholder[target=arg335_1]
%arg336_1 : [num_users=1] = placeholder[target=arg336_1]
%arg337_1 : [num_users=1] = placeholder[target=arg337_1]
%arg338_1 : [num_users=1] = placeholder[target=arg338_1]
%arg339_1 : [num_users=1] = placeholder[target=arg339_1]
%arg340_1 : [num_users=1] = placeholder[target=arg340_1]
%arg341_1 : [num_users=3] = placeholder[target=arg341_1]
%arg342_1 : [num_users=3] = placeholder[target=arg342_1]
%arg343_1 : [num_users=1] = placeholder[target=arg343_1]
%arg344_1 : [num_users=1] = placeholder[target=arg344_1]
%arg345_1 : [num_users=1] = placeholder[target=arg345_1]
%arg346_1 : [num_users=1] = placeholder[target=arg346_1]
%arg347_1 : [num_users=1] = placeholder[target=arg347_1]
%arg348_1 : [num_users=1] = placeholder[target=arg348_1]
%arg349_1 : [num_users=1] = placeholder[target=arg349_1]
%arg350_1 : [num_users=1] = placeholder[target=arg350_1]
%arg351_1 : [num_users=1] = placeholder[target=arg351_1]
%arg352_1 : [num_users=1] = placeholder[target=arg352_1]
%arg353_1 : [num_users=1] = placeholder[target=arg353_1]
%arg354_1 : [num_users=1] = placeholder[target=arg354_1]
%arg355_1 : [num_users=1] = placeholder[target=arg355_1]
%arg356_1 : [num_users=1] = placeholder[target=arg356_1]
%arg357_1 : [num_users=1] = placeholder[target=arg357_1]
%arg358_1 : [num_users=1] = placeholder[target=arg358_1]
%arg359_1 : [num_users=1] = placeholder[target=arg359_1]
%arg360_1 : [num_users=1] = placeholder[target=arg360_1]
%arg361_1 : [num_users=1] = placeholder[target=arg361_1]
%arg362_1 : [num_users=1] = placeholder[target=arg362_1]
%arg363_1 : [num_users=1] = placeholder[target=arg363_1]
%arg364_1 : [num_users=1] = placeholder[target=arg364_1]
%arg365_1 : [num_users=3] = placeholder[target=arg365_1]
%arg366_1 : [num_users=3] = placeholder[target=arg366_1]
%arg367_1 : [num_users=1] = placeholder[target=arg367_1]
%arg368_1 : [num_users=1] = placeholder[target=arg368_1]
%arg369_1 : [num_users=1] = placeholder[target=arg369_1]
%arg370_1 : [num_users=1] = placeholder[target=arg370_1]
%arg371_1 : [num_users=1] = placeholder[target=arg371_1]
%arg372_1 : [num_users=1] = placeholder[target=arg372_1]
%arg373_1 : [num_users=1] = placeholder[target=arg373_1]
%arg374_1 : [num_users=1] = placeholder[target=arg374_1]
%arg375_1 : [num_users=1] = placeholder[target=arg375_1]
%arg376_1 : [num_users=1] = placeholder[target=arg376_1]
%arg377_1 : [num_users=1] = placeholder[target=arg377_1]
%arg378_1 : [num_users=1] = placeholder[target=arg378_1]
%arg379_1 : [num_users=1] = placeholder[target=arg379_1]
%arg380_1 : [num_users=1] = placeholder[target=arg380_1]
%arg381_1 : [num_users=1] = placeholder[target=arg381_1]
%arg382_1 : [num_users=1] = placeholder[target=arg382_1]
%arg383_1 : [num_users=1] = placeholder[target=arg383_1]
%arg384_1 : [num_users=1] = placeholder[target=arg384_1]
%arg385_1 : [num_users=1] = placeholder[target=arg385_1]
%arg386_1 : [num_users=1] = placeholder[target=arg386_1]
%arg387_1 : [num_users=1] = placeholder[target=arg387_1]
%arg388_1 : [num_users=1] = placeholder[target=arg388_1]
%arg389_1 : [num_users=3] = placeholder[target=arg389_1]
%arg390_1 : [num_users=3] = placeholder[target=arg390_1]
%arg391_1 : [num_users=1] = placeholder[target=arg391_1]
%arg392_1 : [num_users=1] = placeholder[target=arg392_1]
%arg393_1 : [num_users=1] = placeholder[target=arg393_1]
%arg394_1 : [num_users=1] = placeholder[target=arg394_1]
%arg395_1 : [num_users=1] = placeholder[target=arg395_1]
%arg396_1 : [num_users=1] = placeholder[target=arg396_1]
%arg397_1 : [num_users=1] = placeholder[target=arg397_1]
%arg398_1 : [num_users=1] = placeholder[target=arg398_1]
%arg399_1 : [num_users=1] = placeholder[target=arg399_1]
%arg400_1 : [num_users=1] = placeholder[target=arg400_1]
%arg401_1 : [num_users=1] = placeholder[target=arg401_1]
%arg402_1 : [num_users=1] = placeholder[target=arg402_1]
%arg403_1 : [num_users=1] = placeholder[target=arg403_1]
%arg404_1 : [num_users=1] = placeholder[target=arg404_1]
%arg405_1 : [num_users=1] = placeholder[target=arg405_1]
%arg406_1 : [num_users=1] = placeholder[target=arg406_1]
%arg407_1 : [num_users=1] = placeholder[target=arg407_1]
%arg408_1 : [num_users=1] = placeholder[target=arg408_1]
%arg409_1 : [num_users=1] = placeholder[target=arg409_1]
%arg410_1 : [num_users=1] = placeholder[target=arg410_1]
%arg411_1 : [num_users=1] = placeholder[target=arg411_1]
%arg412_1 : [num_users=1] = placeholder[target=arg412_1]
%arg413_1 : [num_users=3] = placeholder[target=arg413_1]
%arg414_1 : [num_users=3] = placeholder[target=arg414_1]
%arg415_1 : [num_users=1] = placeholder[target=arg415_1]
%arg416_1 : [num_users=1] = placeholder[target=arg416_1]
%arg417_1 : [num_users=1] = placeholder[target=arg417_1]
%arg418_1 : [num_users=1] = placeholder[target=arg418_1]
%arg419_1 : [num_users=1] = placeholder[target=arg419_1]
%arg420_1 : [num_users=1] = placeholder[target=arg420_1]
%arg421_1 : [num_users=1] = placeholder[target=arg421_1]
%arg422_1 : [num_users=1] = placeholder[target=arg422_1]
%arg423_1 : [num_users=1] = placeholder[target=arg423_1]
%arg424_1 : [num_users=1] = placeholder[target=arg424_1]
%arg425_1 : [num_users=1] = placeholder[target=arg425_1]
%arg426_1 : [num_users=1] = placeholder[target=arg426_1]
%arg427_1 : [num_users=1] = placeholder[target=arg427_1]
%arg428_1 : [num_users=1] = placeholder[target=arg428_1]
%arg429_1 : [num_users=1] = placeholder[target=arg429_1]
%arg430_1 : [num_users=1] = placeholder[target=arg430_1]
%arg431_1 : [num_users=1] = placeholder[target=arg431_1]
%arg432_1 : [num_users=1] = placeholder[target=arg432_1]
%arg433_1 : [num_users=1] = placeholder[target=arg433_1]
%arg434_1 : [num_users=1] = placeholder[target=arg434_1]
%arg435_1 : [num_users=1] = placeholder[target=arg435_1]
%arg436_1 : [num_users=1] = placeholder[target=arg436_1]
%arg437_1 : [num_users=3] = placeholder[target=arg437_1]
%arg438_1 : [num_users=3] = placeholder[target=arg438_1]
%arg439_1 : [num_users=1] = placeholder[target=arg439_1]
%arg440_1 : [num_users=1] = placeholder[target=arg440_1]
%arg441_1 : [num_users=1] = placeholder[target=arg441_1]
%arg442_1 : [num_users=1] = placeholder[target=arg442_1]
%arg443_1 : [num_users=1] = placeholder[target=arg443_1]
%arg444_1 : [num_users=1] = placeholder[target=arg444_1]
%arg445_1 : [num_users=1] = placeholder[target=arg445_1]
%arg446_1 : [num_users=1] = placeholder[target=arg446_1]
%arg447_1 : [num_users=1] = placeholder[target=arg447_1]
%arg448_1 : [num_users=1] = placeholder[target=arg448_1]
%arg449_1 : [num_users=1] = placeholder[target=arg449_1]
%arg450_1 : [num_users=1] = placeholder[target=arg450_1]
%arg451_1 : [num_users=1] = placeholder[target=arg451_1]
%arg452_1 : [num_users=1] = placeholder[target=arg452_1]
%arg453_1 : [num_users=1] = placeholder[target=arg453_1]
%arg454_1 : [num_users=1] = placeholder[target=arg454_1]
%arg455_1 : [num_users=1] = placeholder[target=arg455_1]
%arg456_1 : [num_users=1] = placeholder[target=arg456_1]
%arg457_1 : [num_users=1] = placeholder[target=arg457_1]
%arg458_1 : [num_users=1] = placeholder[target=arg458_1]
%arg459_1 : [num_users=1] = placeholder[target=arg459_1]
%arg460_1 : [num_users=1] = placeholder[target=arg460_1]
%arg461_1 : [num_users=3] = placeholder[target=arg461_1]
%arg462_1 : [num_users=3] = placeholder[target=arg462_1]
%arg463_1 : [num_users=1] = placeholder[target=arg463_1]
%arg464_1 : [num_users=1] = placeholder[target=arg464_1]
%arg465_1 : [num_users=1] = placeholder[target=arg465_1]
%arg466_1 : [num_users=1] = placeholder[target=arg466_1]
%arg467_1 : [num_users=1] = placeholder[target=arg467_1]
%arg468_1 : [num_users=1] = placeholder[target=arg468_1]
%arg469_1 : [num_users=1] = placeholder[target=arg469_1]
%arg470_1 : [num_users=1] = placeholder[target=arg470_1]
%arg471_1 : [num_users=1] = placeholder[target=arg471_1]
%arg472_1 : [num_users=1] = placeholder[target=arg472_1]
%arg473_1 : [num_users=1] = placeholder[target=arg473_1]
%arg474_1 : [num_users=1] = placeholder[target=arg474_1]
%arg475_1 : [num_users=1] = placeholder[target=arg475_1]
%arg476_1 : [num_users=1] = placeholder[target=arg476_1]
%arg477_1 : [num_users=1] = placeholder[target=arg477_1]
%arg478_1 : [num_users=1] = placeholder[target=arg478_1]
%arg479_1 : [num_users=1] = placeholder[target=arg479_1]
%arg480_1 : [num_users=1] = placeholder[target=arg480_1]
%arg481_1 : [num_users=1] = placeholder[target=arg481_1]
%arg482_1 : [num_users=1] = placeholder[target=arg482_1]
%arg483_1 : [num_users=1] = placeholder[target=arg483_1]
%arg484_1 : [num_users=1] = placeholder[target=arg484_1]
%arg485_1 : [num_users=3] = placeholder[target=arg485_1]
%arg486_1 : [num_users=3] = placeholder[target=arg486_1]
%arg487_1 : [num_users=1] = placeholder[target=arg487_1]
%arg488_1 : [num_users=1] = placeholder[target=arg488_1]
%arg489_1 : [num_users=1] = placeholder[target=arg489_1]
%arg490_1 : [num_users=1] = placeholder[target=arg490_1]
%arg491_1 : [num_users=1] = placeholder[target=arg491_1]
%arg492_1 : [num_users=1] = placeholder[target=arg492_1]
%arg493_1 : [num_users=1] = placeholder[target=arg493_1]
%arg494_1 : [num_users=1] = placeholder[target=arg494_1]
%arg495_1 : [num_users=1] = placeholder[target=arg495_1]
%arg496_1 : [num_users=1] = placeholder[target=arg496_1]
%arg497_1 : [num_users=1] = placeholder[target=arg497_1]
%arg498_1 : [num_users=1] = placeholder[target=arg498_1]
%arg499_1 : [num_users=1] = placeholder[target=arg499_1]
%arg500_1 : [num_users=1] = placeholder[target=arg500_1]
%arg501_1 : [num_users=1] = placeholder[target=arg501_1]
%arg502_1 : [num_users=1] = placeholder[target=arg502_1]
%arg503_1 : [num_users=1] = placeholder[target=arg503_1]
%arg504_1 : [num_users=1] = placeholder[target=arg504_1]
%arg505_1 : [num_users=1] = placeholder[target=arg505_1]
%arg506_1 : [num_users=1] = placeholder[target=arg506_1]
%arg507_1 : [num_users=1] = placeholder[target=arg507_1]
%arg508_1 : [num_users=1] = placeholder[target=arg508_1]
%arg509_1 : [num_users=3] = placeholder[target=arg509_1]
%arg510_1 : [num_users=3] = placeholder[target=arg510_1]
%arg511_1 : [num_users=1] = placeholder[target=arg511_1]
%arg512_1 : [num_users=1] = placeholder[target=arg512_1]
%arg513_1 : [num_users=1] = placeholder[target=arg513_1]
%arg514_1 : [num_users=1] = placeholder[target=arg514_1]
%arg515_1 : [num_users=1] = placeholder[target=arg515_1]
%arg516_1 : [num_users=1] = placeholder[target=arg516_1]
%arg517_1 : [num_users=1] = placeholder[target=arg517_1]
%arg518_1 : [num_users=1] = placeholder[target=arg518_1]
%arg519_1 : [num_users=1] = placeholder[target=arg519_1]
%arg520_1 : [num_users=1] = placeholder[target=arg520_1]
%arg521_1 : [num_users=1] = placeholder[target=arg521_1]
%arg522_1 : [num_users=1] = placeholder[target=arg522_1]
%arg523_1 : [num_users=1] = placeholder[target=arg523_1]
%arg524_1 : [num_users=1] = placeholder[target=arg524_1]
%arg525_1 : [num_users=1] = placeholder[target=arg525_1]
%arg526_1 : [num_users=1] = placeholder[target=arg526_1]
%arg527_1 : [num_users=1] = placeholder[target=arg527_1]
%arg528_1 : [num_users=1] = placeholder[target=arg528_1]
%arg529_1 : [num_users=1] = placeholder[target=arg529_1]
%arg530_1 : [num_users=1] = placeholder[target=arg530_1]
%arg531_1 : [num_users=1] = placeholder[target=arg531_1]
%arg532_1 : [num_users=1] = placeholder[target=arg532_1]
%arg533_1 : [num_users=3] = placeholder[target=arg533_1]
%arg534_1 : [num_users=3] = placeholder[target=arg534_1]
%arg535_1 : [num_users=1] = placeholder[target=arg535_1]
%arg536_1 : [num_users=1] = placeholder[target=arg536_1]
%arg537_1 : [num_users=1] = placeholder[target=arg537_1]
%arg538_1 : [num_users=1] = placeholder[target=arg538_1]
%arg539_1 : [num_users=1] = placeholder[target=arg539_1]
%arg540_1 : [num_users=1] = placeholder[target=arg540_1]
%arg541_1 : [num_users=1] = placeholder[target=arg541_1]
%arg542_1 : [num_users=1] = placeholder[target=arg542_1]
%arg543_1 : [num_users=1] = placeholder[target=arg543_1]
%arg544_1 : [num_users=1] = placeholder[target=arg544_1]
%arg545_1 : [num_users=1] = placeholder[target=arg545_1]
%arg546_1 : [num_users=1] = placeholder[target=arg546_1]
%arg547_1 : [num_users=1] = placeholder[target=arg547_1]
%arg548_1 : [num_users=1] = placeholder[target=arg548_1]
%arg549_1 : [num_users=1] = placeholder[target=arg549_1]
%arg550_1 : [num_users=1] = placeholder[target=arg550_1]
%arg551_1 : [num_users=1] = placeholder[target=arg551_1]
%arg552_1 : [num_users=1] = placeholder[target=arg552_1]
%arg553_1 : [num_users=1] = placeholder[target=arg553_1]
%arg554_1 : [num_users=1] = placeholder[target=arg554_1]
%arg555_1 : [num_users=1] = placeholder[target=arg555_1]
%arg556_1 : [num_users=1] = placeholder[target=arg556_1]
%arg557_1 : [num_users=3] = placeholder[target=arg557_1]
%arg558_1 : [num_users=3] = placeholder[target=arg558_1]
%arg559_1 : [num_users=1] = placeholder[target=arg559_1]
%arg560_1 : [num_users=1] = placeholder[target=arg560_1]
%arg561_1 : [num_users=1] = placeholder[target=arg561_1]
%arg562_1 : [num_users=1] = placeholder[target=arg562_1]
%arg563_1 : [num_users=1] = placeholder[target=arg563_1]
%arg564_1 : [num_users=1] = placeholder[target=arg564_1]
%arg565_1 : [num_users=1] = placeholder[target=arg565_1]
%arg566_1 : [num_users=1] = placeholder[target=arg566_1]
%arg567_1 : [num_users=1] = placeholder[target=arg567_1]
%arg568_1 : [num_users=1] = placeholder[target=arg568_1]
%arg569_1 : [num_users=1] = placeholder[target=arg569_1]
%arg570_1 : [num_users=1] = placeholder[target=arg570_1]
%arg571_1 : [num_users=1] = placeholder[target=arg571_1]
%arg572_1 : [num_users=1] = placeholder[target=arg572_1]
%arg573_1 : [num_users=1] = placeholder[target=arg573_1]
%arg574_1 : [num_users=1] = placeholder[target=arg574_1]
%arg575_1 : [num_users=1] = placeholder[target=arg575_1]
%arg576_1 : [num_users=1] = placeholder[target=arg576_1]
%arg577_1 : [num_users=1] = placeholder[target=arg577_1]
%arg578_1 : [num_users=1] = placeholder[target=arg578_1]
%arg579_1 : [num_users=1] = placeholder[target=arg579_1]
%arg580_1 : [num_users=1] = placeholder[target=arg580_1]
%arg581_1 : [num_users=3] = placeholder[target=arg581_1]
%arg582_1 : [num_users=3] = placeholder[target=arg582_1]
%arg583_1 : [num_users=1] = placeholder[target=arg583_1]
%arg584_1 : [num_users=1] = placeholder[target=arg584_1]
%arg585_1 : [num_users=1] = placeholder[target=arg585_1]
%arg586_1 : [num_users=1] = placeholder[target=arg586_1]
%arg587_1 : [num_users=1] = placeholder[target=arg587_1]
%arg588_1 : [num_users=1] = placeholder[target=arg588_1]
%arg589_1 : [num_users=1] = placeholder[target=arg589_1]
%arg590_1 : [num_users=1] = placeholder[target=arg590_1]
%arg591_1 : [num_users=1] = placeholder[target=arg591_1]
%arg592_1 : [num_users=1] = placeholder[target=arg592_1]
%arg593_1 : [num_users=1] = placeholder[target=arg593_1]
%arg594_1 : [num_users=1] = placeholder[target=arg594_1]
%arg595_1 : [num_users=1] = placeholder[target=arg595_1]
%arg596_1 : [num_users=1] = placeholder[target=arg596_1]
%arg597_1 : [num_users=1] = placeholder[target=arg597_1]
%arg598_1 : [num_users=1] = placeholder[target=arg598_1]
%arg599_1 : [num_users=1] = placeholder[target=arg599_1]
%arg600_1 : [num_users=1] = placeholder[target=arg600_1]
%arg601_1 : [num_users=1] = placeholder[target=arg601_1]
%arg602_1 : [num_users=1] = placeholder[target=arg602_1]
%arg603_1 : [num_users=1] = placeholder[target=arg603_1]
%arg604_1 : [num_users=1] = placeholder[target=arg604_1]
%arg605_1 : [num_users=3] = placeholder[target=arg605_1]
%arg606_1 : [num_users=3] = placeholder[target=arg606_1]
%arg607_1 : [num_users=1] = placeholder[target=arg607_1]
%arg608_1 : [num_users=1] = placeholder[target=arg608_1]
%arg609_1 : [num_users=1] = placeholder[target=arg609_1]
%arg610_1 : [num_users=1] = placeholder[target=arg610_1]
%arg611_1 : [num_users=1] = placeholder[target=arg611_1]
%arg612_1 : [num_users=1] = placeholder[target=arg612_1]
%arg613_1 : [num_users=1] = placeholder[target=arg613_1]
%arg614_1 : [num_users=1] = placeholder[target=arg614_1]
%arg615_1 : [num_users=1] = placeholder[target=arg615_1]
%arg616_1 : [num_users=1] = placeholder[target=arg616_1]
%arg617_1 : [num_users=1] = placeholder[target=arg617_1]
%arg618_1 : [num_users=1] = placeholder[target=arg618_1]
%arg619_1 : [num_users=1] = placeholder[target=arg619_1]
%arg620_1 : [num_users=1] = placeholder[target=arg620_1]
%arg621_1 : [num_users=1] = placeholder[target=arg621_1]
%arg622_1 : [num_users=1] = placeholder[target=arg622_1]
%arg623_1 : [num_users=1] = placeholder[target=arg623_1]
%arg624_1 : [num_users=1] = placeholder[target=arg624_1]
%arg625_1 : [num_users=1] = placeholder[target=arg625_1]
%arg626_1 : [num_users=1] = placeholder[target=arg626_1]
%arg627_1 : [num_users=1] = placeholder[target=arg627_1]
%arg628_1 : [num_users=1] = placeholder[target=arg628_1]
%arg629_1 : [num_users=3] = placeholder[target=arg629_1]
%arg630_1 : [num_users=3] = placeholder[target=arg630_1]
%arg631_1 : [num_users=1] = placeholder[target=arg631_1]
%arg632_1 : [num_users=1] = placeholder[target=arg632_1]
%arg633_1 : [num_users=1] = placeholder[target=arg633_1]
%arg634_1 : [num_users=1] = placeholder[target=arg634_1]
%arg635_1 : [num_users=1] = placeholder[target=arg635_1]
%arg636_1 : [num_users=1] = placeholder[target=arg636_1]
%arg637_1 : [num_users=1] = placeholder[target=arg637_1]
%arg638_1 : [num_users=1] = placeholder[target=arg638_1]
%arg639_1 : [num_users=1] = placeholder[target=arg639_1]
%arg640_1 : [num_users=1] = placeholder[target=arg640_1]
%arg641_1 : [num_users=1] = placeholder[target=arg641_1]
%arg642_1 : [num_users=1] = placeholder[target=arg642_1]
%arg643_1 : [num_users=1] = placeholder[target=arg643_1]
%arg644_1 : [num_users=1] = placeholder[target=arg644_1]
%arg645_1 : [num_users=1] = placeholder[target=arg645_1]
%arg646_1 : [num_users=1] = placeholder[target=arg646_1]
%arg647_1 : [num_users=1] = placeholder[target=arg647_1]
%arg648_1 : [num_users=1] = placeholder[target=arg648_1]
%arg649_1 : [num_users=1] = placeholder[target=arg649_1]
%arg650_1 : [num_users=1] = placeholder[target=arg650_1]
%arg651_1 : [num_users=1] = placeholder[target=arg651_1]
%arg652_1 : [num_users=1] = placeholder[target=arg652_1]
%arg653_1 : [num_users=3] = placeholder[target=arg653_1]
%arg654_1 : [num_users=3] = placeholder[target=arg654_1]
%arg655_1 : [num_users=1] = placeholder[target=arg655_1]
%arg656_1 : [num_users=1] = placeholder[target=arg656_1]
%arg657_1 : [num_users=1] = placeholder[target=arg657_1]
%arg658_1 : [num_users=1] = placeholder[target=arg658_1]
%arg659_1 : [num_users=1] = placeholder[target=arg659_1]
%arg660_1 : [num_users=1] = placeholder[target=arg660_1]
%arg661_1 : [num_users=1] = placeholder[target=arg661_1]
%arg662_1 : [num_users=1] = placeholder[target=arg662_1]
%arg663_1 : [num_users=1] = placeholder[target=arg663_1]
%arg664_1 : [num_users=1] = placeholder[target=arg664_1]
%arg665_1 : [num_users=1] = placeholder[target=arg665_1]
%arg666_1 : [num_users=1] = placeholder[target=arg666_1]
%arg667_1 : [num_users=1] = placeholder[target=arg667_1]
%arg668_1 : [num_users=1] = placeholder[target=arg668_1]
%arg669_1 : [num_users=1] = placeholder[target=arg669_1]
%arg670_1 : [num_users=1] = placeholder[target=arg670_1]
%arg671_1 : [num_users=1] = placeholder[target=arg671_1]
%arg672_1 : [num_users=1] = placeholder[target=arg672_1]
%arg673_1 : [num_users=1] = placeholder[target=arg673_1]
%arg674_1 : [num_users=1] = placeholder[target=arg674_1]
%arg675_1 : [num_users=1] = placeholder[target=arg675_1]
%arg676_1 : [num_users=1] = placeholder[target=arg676_1]
%arg677_1 : [num_users=3] = placeholder[target=arg677_1]
%arg678_1 : [num_users=3] = placeholder[target=arg678_1]
%arg679_1 : [num_users=1] = placeholder[target=arg679_1]
%arg680_1 : [num_users=1] = placeholder[target=arg680_1]
%arg681_1 : [num_users=1] = placeholder[target=arg681_1]
%arg682_1 : [num_users=1] = placeholder[target=arg682_1]
%arg683_1 : [num_users=1] = placeholder[target=arg683_1]
%arg684_1 : [num_users=1] = placeholder[target=arg684_1]
%arg685_1 : [num_users=1] = placeholder[target=arg685_1]
%arg686_1 : [num_users=1] = placeholder[target=arg686_1]
%arg687_1 : [num_users=1] = placeholder[target=arg687_1]
%arg688_1 : [num_users=1] = placeholder[target=arg688_1]
%arg689_1 : [num_users=1] = placeholder[target=arg689_1]
%arg690_1 : [num_users=1] = placeholder[target=arg690_1]
%arg691_1 : [num_users=1] = placeholder[target=arg691_1]
%arg692_1 : [num_users=1] = placeholder[target=arg692_1]
%arg693_1 : [num_users=1] = placeholder[target=arg693_1]
%arg694_1 : [num_users=1] = placeholder[target=arg694_1]
%arg695_1 : [num_users=1] = placeholder[target=arg695_1]
%arg696_1 : [num_users=1] = placeholder[target=arg696_1]
%arg697_1 : [num_users=1] = placeholder[target=arg697_1]
%arg698_1 : [num_users=1] = placeholder[target=arg698_1]
%arg699_1 : [num_users=1] = placeholder[target=arg699_1]
%arg700_1 : [num_users=1] = placeholder[target=arg700_1]
%arg701_1 : [num_users=3] = placeholder[target=arg701_1]
%arg702_1 : [num_users=3] = placeholder[target=arg702_1]
%arg703_1 : [num_users=1] = placeholder[target=arg703_1]
%arg704_1 : [num_users=1] = placeholder[target=arg704_1]
%arg705_1 : [num_users=1] = placeholder[target=arg705_1]
%arg706_1 : [num_users=1] = placeholder[target=arg706_1]
%arg707_1 : [num_users=1] = placeholder[target=arg707_1]
%arg708_1 : [num_users=1] = placeholder[target=arg708_1]
%arg709_1 : [num_users=1] = placeholder[target=arg709_1]
%arg710_1 : [num_users=1] = placeholder[target=arg710_1]
%arg711_1 : [num_users=1] = placeholder[target=arg711_1]
%arg712_1 : [num_users=1] = placeholder[target=arg712_1]
%arg713_1 : [num_users=1] = placeholder[target=arg713_1]
%arg714_1 : [num_users=1] = placeholder[target=arg714_1]
%arg715_1 : [num_users=1] = placeholder[target=arg715_1]
%arg716_1 : [num_users=1] = placeholder[target=arg716_1]
%arg717_1 : [num_users=1] = placeholder[target=arg717_1]
%arg718_1 : [num_users=1] = placeholder[target=arg718_1]
%arg719_1 : [num_users=1] = placeholder[target=arg719_1]
%arg720_1 : [num_users=1] = placeholder[target=arg720_1]
%arg721_1 : [num_users=1] = placeholder[target=arg721_1]
%arg722_1 : [num_users=1] = placeholder[target=arg722_1]
%arg723_1 : [num_users=1] = placeholder[target=arg723_1]
%arg724_1 : [num_users=1] = placeholder[target=arg724_1]
%arg725_1 : [num_users=3] = placeholder[target=arg725_1]
%arg726_1 : [num_users=3] = placeholder[target=arg726_1]
%arg727_1 : [num_users=1] = placeholder[target=arg727_1]
%arg728_1 : [num_users=1] = placeholder[target=arg728_1]
%arg729_1 : [num_users=1] = placeholder[target=arg729_1]
%arg730_1 : [num_users=1] = placeholder[target=arg730_1]
%arg731_1 : [num_users=1] = placeholder[target=arg731_1]
%arg732_1 : [num_users=1] = placeholder[target=arg732_1]
%arg733_1 : [num_users=1] = placeholder[target=arg733_1]
%arg734_1 : [num_users=1] = placeholder[target=arg734_1]
%arg735_1 : [num_users=1] = placeholder[target=arg735_1]
%arg736_1 : [num_users=1] = placeholder[target=arg736_1]
%arg737_1 : [num_users=1] = placeholder[target=arg737_1]
%arg738_1 : [num_users=1] = placeholder[target=arg738_1]
%arg739_1 : [num_users=1] = placeholder[target=arg739_1]
%arg740_1 : [num_users=1] = placeholder[target=arg740_1]
%arg741_1 : [num_users=1] = placeholder[target=arg741_1]
%arg742_1 : [num_users=1] = placeholder[target=arg742_1]
%arg743_1 : [num_users=1] = placeholder[target=arg743_1]
%arg744_1 : [num_users=1] = placeholder[target=arg744_1]
%arg745_1 : [num_users=1] = placeholder[target=arg745_1]
%arg746_1 : [num_users=1] = placeholder[target=arg746_1]
%arg747_1 : [num_users=1] = placeholder[target=arg747_1]
%arg748_1 : [num_users=1] = placeholder[target=arg748_1]
%arg749_1 : [num_users=3] = placeholder[target=arg749_1]
%arg750_1 : [num_users=3] = placeholder[target=arg750_1]
%arg751_1 : [num_users=1] = placeholder[target=arg751_1]
%arg752_1 : [num_users=1] = placeholder[target=arg752_1]
%arg753_1 : [num_users=1] = placeholder[target=arg753_1]
%arg754_1 : [num_users=1] = placeholder[target=arg754_1]
%arg755_1 : [num_users=1] = placeholder[target=arg755_1]
%arg756_1 : [num_users=1] = placeholder[target=arg756_1]
%arg757_1 : [num_users=1] = placeholder[target=arg757_1]
%arg758_1 : [num_users=1] = placeholder[target=arg758_1]
%arg759_1 : [num_users=1] = placeholder[target=arg759_1]
%arg760_1 : [num_users=1] = placeholder[target=arg760_1]
%arg761_1 : [num_users=1] = placeholder[target=arg761_1]
%arg762_1 : [num_users=1] = placeholder[target=arg762_1]
%arg763_1 : [num_users=1] = placeholder[target=arg763_1]
%arg764_1 : [num_users=1] = placeholder[target=arg764_1]
%arg765_1 : [num_users=1] = placeholder[target=arg765_1]
%arg766_1 : [num_users=1] = placeholder[target=arg766_1]
%arg767_1 : [num_users=1] = placeholder[target=arg767_1]
%arg768_1 : [num_users=1] = placeholder[target=arg768_1]
%arg769_1 : [num_users=1] = placeholder[target=arg769_1]
%arg770_1 : [num_users=1] = placeholder[target=arg770_1]
%arg771_1 : [num_users=1] = placeholder[target=arg771_1]
%arg772_1 : [num_users=1] = placeholder[target=arg772_1]
%arg773_1 : [num_users=3] = placeholder[target=arg773_1]
%arg774_1 : [num_users=3] = placeholder[target=arg774_1]
%arg775_1 : [num_users=1] = placeholder[target=arg775_1]
%arg776_1 : [num_users=1] = placeholder[target=arg776_1]
%arg777_1 : [num_users=1] = placeholder[target=arg777_1]
%arg778_1 : [num_users=1] = placeholder[target=arg778_1]
%arg779_1 : [num_users=1] = placeholder[target=arg779_1]
%arg780_1 : [num_users=1] = placeholder[target=arg780_1]
%arg781_1 : [num_users=1] = placeholder[target=arg781_1]
%arg782_1 : [num_users=1] = placeholder[target=arg782_1]
%arg783_1 : [num_users=1] = placeholder[target=arg783_1]
%arg784_1 : [num_users=1] = placeholder[target=arg784_1]
%arg785_1 : [num_users=1] = placeholder[target=arg785_1]
%arg786_1 : [num_users=1] = placeholder[target=arg786_1]
%arg787_1 : [num_users=1] = placeholder[target=arg787_1]
%arg788_1 : [num_users=1] = placeholder[target=arg788_1]
%arg789_1 : [num_users=1] = placeholder[target=arg789_1]
%arg790_1 : [num_users=1] = placeholder[target=arg790_1]
%arg791_1 : [num_users=1] = placeholder[target=arg791_1]
%arg792_1 : [num_users=1] = placeholder[target=arg792_1]
%arg793_1 : [num_users=1] = placeholder[target=arg793_1]
%arg794_1 : [num_users=1] = placeholder[target=arg794_1]
%arg795_1 : [num_users=1] = placeholder[target=arg795_1]
%arg796_1 : [num_users=1] = placeholder[target=arg796_1]
%arg797_1 : [num_users=3] = placeholder[target=arg797_1]
%arg798_1 : [num_users=3] = placeholder[target=arg798_1]
%arg799_1 : [num_users=1] = placeholder[target=arg799_1]
%arg800_1 : [num_users=1] = placeholder[target=arg800_1]
%arg801_1 : [num_users=1] = placeholder[target=arg801_1]
%arg802_1 : [num_users=1] = placeholder[target=arg802_1]
%arg803_1 : [num_users=1] = placeholder[target=arg803_1]
%arg804_1 : [num_users=1] = placeholder[target=arg804_1]
%arg805_1 : [num_users=1] = placeholder[target=arg805_1]
%arg806_1 : [num_users=1] = placeholder[target=arg806_1]
%arg807_1 : [num_users=1] = placeholder[target=arg807_1]
%arg808_1 : [num_users=1] = placeholder[target=arg808_1]
%arg809_1 : [num_users=1] = placeholder[target=arg809_1]
%arg810_1 : [num_users=1] = placeholder[target=arg810_1]
%arg811_1 : [num_users=1] = placeholder[target=arg811_1]
%arg812_1 : [num_users=1] = placeholder[target=arg812_1]
%arg813_1 : [num_users=1] = placeholder[target=arg813_1]
%arg814_1 : [num_users=1] = placeholder[target=arg814_1]
%arg815_1 : [num_users=1] = placeholder[target=arg815_1]
%arg816_1 : [num_users=1] = placeholder[target=arg816_1]
%arg817_1 : [num_users=1] = placeholder[target=arg817_1]
%arg818_1 : [num_users=1] = placeholder[target=arg818_1]
%arg819_1 : [num_users=1] = placeholder[target=arg819_1]
%arg820_1 : [num_users=1] = placeholder[target=arg820_1]
%arg821_1 : [num_users=3] = placeholder[target=arg821_1]
%arg822_1 : [num_users=3] = placeholder[target=arg822_1]
%arg823_1 : [num_users=1] = placeholder[target=arg823_1]
%arg824_1 : [num_users=1] = placeholder[target=arg824_1]
%arg825_1 : [num_users=1] = placeholder[target=arg825_1]
%arg826_1 : [num_users=1] = placeholder[target=arg826_1]
%arg827_1 : [num_users=1] = placeholder[target=arg827_1]
%arg828_1 : [num_users=1] = placeholder[target=arg828_1]
%arg829_1 : [num_users=1] = placeholder[target=arg829_1]
%arg830_1 : [num_users=1] = placeholder[target=arg830_1]
%arg831_1 : [num_users=1] = placeholder[target=arg831_1]
%arg832_1 : [num_users=1] = placeholder[target=arg832_1]
%arg833_1 : [num_users=1] = placeholder[target=arg833_1]
%arg834_1 : [num_users=1] = placeholder[target=arg834_1]
%arg835_1 : [num_users=1] = placeholder[target=arg835_1]
%arg836_1 : [num_users=1] = placeholder[target=arg836_1]
%arg837_1 : [num_users=1] = placeholder[target=arg837_1]
%arg838_1 : [num_users=1] = placeholder[target=arg838_1]
%arg839_1 : [num_users=1] = placeholder[target=arg839_1]
%arg840_1 : [num_users=98] = placeholder[target=arg840_1]
%embedding : [num_users=3] = call_function[target=torch.ops.aten.embedding.default](args = (%arg0_1, %arg839_1), kwargs = {})
%index : [num_users=32] = call_function[target=torch.ops.aten.index.Tensor](args = (%arg66_1, [%arg840_1]), kwargs = {})
%index_1 : [num_users=32] = call_function[target=torch.ops.aten.index.Tensor](args = (%arg67_1, [%arg840_1]), kwargs = {})
%mul : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%embedding, %embedding), kwargs = {})
%mean : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul, [-1], True), kwargs = {})
%add : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean, 1e-06), kwargs = {})
%rsqrt : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add,), kwargs = {})
%mul_1 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%embedding, %rsqrt), kwargs = {})
%mul_2 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_1, %arg1_1), kwargs = {})
%choose_qparams_per_token_asymmetric : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_2, torch.int8), kwargs = {})
%getitem : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric, 0), kwargs = {})
%getitem_1 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric, 1), kwargs = {})
%quantize_per_token : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_2, %getitem, %getitem_1, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token, %getitem, %getitem_1, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg68_1, %arg69_1, %arg70_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group,), kwargs = {})
%view : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token, [1, 4096]), kwargs = {})
%mm : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view, %t), kwargs = {})
%view_1 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_1 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_2, torch.int8), kwargs = {})
%getitem_2 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_1, 0), kwargs = {})
%getitem_3 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_1, 1), kwargs = {})
%quantize_per_token_1 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_2, %getitem_2, %getitem_3, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_1 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_1, %getitem_2, %getitem_3, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_1 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg71_1, %arg72_1, %arg73_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_1 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_1,), kwargs = {})
%view_2 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_1, [1, 4096]), kwargs = {})
%mm_1 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_2, %t_1), kwargs = {})
%view_3 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_1, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_2 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_2, torch.int8), kwargs = {})
%getitem_4 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_2, 0), kwargs = {})
%getitem_5 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_2, 1), kwargs = {})
%quantize_per_token_2 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_2, %getitem_4, %getitem_5, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_2 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_2, %getitem_4, %getitem_5, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_2 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg74_1, %arg75_1, %arg76_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_2 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_2,), kwargs = {})
%view_4 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_2, [1, 4096]), kwargs = {})
%mm_2 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_4, %t_2), kwargs = {})
%view_5 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_2, [1, 1, 4096]), kwargs = {})
%view_6 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_1, [1, 1, 32, 128]), kwargs = {})
%view_7 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_3, [1, 1, 32, 128]), kwargs = {})
%view_8 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_5, [1, 1, 32, 128]), kwargs = {})
%view_9 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_6, [1, 1, 32, -1, 2]), kwargs = {})
%unbind : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_9, -1), kwargs = {})
%getitem_6 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind, 0), kwargs = {})
%getitem_7 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind, 1), kwargs = {})
%view_10 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_7, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_1 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_10, -1), kwargs = {})
%getitem_8 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_1, 0), kwargs = {})
%getitem_9 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_1, 1), kwargs = {})
%view_11 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_12 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_3 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_6, %view_11), kwargs = {})
%mul_4 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_7, %view_12), kwargs = {})
%sub : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_3, %mul_4), kwargs = {})
%mul_5 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_6, %view_12), kwargs = {})
%mul_6 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_7, %view_11), kwargs = {})
%add_1 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_5, %mul_6), kwargs = {})
%mul_7 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_8, %view_11), kwargs = {})
%mul_8 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_9, %view_12), kwargs = {})
%sub_1 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_7, %mul_8), kwargs = {})
%mul_9 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_8, %view_12), kwargs = {})
%mul_10 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_9, %view_11), kwargs = {})
%add_2 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_9, %mul_10), kwargs = {})
%stack : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub, %add_1], -1), kwargs = {})
%view_13 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack, [1, 1, 32, 128]), kwargs = {})
%stack_1 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_1, %add_2], -1), kwargs = {})
%view_14 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_1, [1, 1, 32, 128]), kwargs = {})
%transpose : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_13, 1, 2), kwargs = {})
%transpose_1 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_14, 1, 2), kwargs = {})
%transpose_2 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_8, 1, 2), kwargs = {})
%slice_1 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg77_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_2 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_1, 1, 0, 9223372036854775807), kwargs = {})
%view_15 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_1, [32, 1, 128]), kwargs = {})
%index_put : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_2, [None, None, %arg840_1], %view_15), kwargs = {})
%slice_3 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg77_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_3, %index_put, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_1 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg77_1, %slice_scatter, 0, 0, 9223372036854775807), kwargs = {})
%slice_6 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg78_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_7 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_6, 1, 0, 9223372036854775807), kwargs = {})
%view_16 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_2, [32, 1, 128]), kwargs = {})
%index_put_1 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_7, [None, None, %arg840_1], %view_16), kwargs = {})
%slice_8 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg78_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_2 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_8, %index_put_1, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_3 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg78_1, %slice_scatter_2, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg79_1, 0), kwargs = {})
%unsqueeze_1 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze, 1), kwargs = {})
%index_2 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_1, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_3 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_1, 2), kwargs = {})
%expand_1 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_3, [1, 32, 1, 128, 128]), kwargs = {})
%clone : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_1,), kwargs = {memory_format: torch.contiguous_format})
%view_17 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_5 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_3, 2), kwargs = {})
%expand_3 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_5, [1, 32, 1, 128, 128]), kwargs = {})
%clone_1 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_3,), kwargs = {memory_format: torch.contiguous_format})
%view_18 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_1, [1, 32, 128, 128]), kwargs = {})
%zeros_like : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_2,), kwargs = {dtype: torch.float32})
%logical_not : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_2,), kwargs = {})
%masked_fill : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like, %logical_not, -inf), kwargs = {})
%mul_11 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose, 0.29730177875068026), kwargs = {})
%transpose_3 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_17, -2, -1), kwargs = {})
%mul_12 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_3, 0.29730177875068026), kwargs = {})
%expand_4 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_11, [1, 32, 1, 128]), kwargs = {})
%view_19 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_4, [32, 1, 128]), kwargs = {})
%expand_5 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_12, [1, 32, 128, 128]), kwargs = {})
%view_20 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_5, [32, 128, 128]), kwargs = {})
%bmm : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_19, %view_20), kwargs = {})
%view_21 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm, [1, 32, 1, 128]), kwargs = {})
%add_3 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_21, %masked_fill), kwargs = {})
%_softmax : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_3, -1, False), kwargs = {})
%expand_6 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax, [1, 32, 1, 128]), kwargs = {})
%view_22 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_6, [32, 1, 128]), kwargs = {})
%expand_7 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_18, [1, 32, 128, 128]), kwargs = {})
%view_23 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_7, [32, 128, 128]), kwargs = {})
%bmm_1 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_22, %view_23), kwargs = {})
%view_24 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_1, [1, 32, 1, 128]), kwargs = {})
%transpose_4 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_24, 1, 2), kwargs = {})
%view_25 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_4, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_3 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_25, torch.int8), kwargs = {})
%getitem_10 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_3, 0), kwargs = {})
%getitem_11 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_3, 1), kwargs = {})
%quantize_per_token_3 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_25, %getitem_10, %getitem_11, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_3 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_3, %getitem_10, %getitem_11, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_3 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg80_1, %arg81_1, %arg82_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_3 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_3,), kwargs = {})
%view_26 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_3, [1, 4096]), kwargs = {})
%mm_3 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_26, %t_3), kwargs = {})
%view_27 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_3, [1, 1, 4096]), kwargs = {})
%add_4 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%embedding, %view_27), kwargs = {})
%mul_13 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_4, %add_4), kwargs = {})
%mean_1 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_13, [-1], True), kwargs = {})
%add_5 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_1, 1e-06), kwargs = {})
%rsqrt_1 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_5,), kwargs = {})
%mul_14 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_4, %rsqrt_1), kwargs = {})
%mul_15 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_14, %arg2_1), kwargs = {})
%choose_qparams_per_token_asymmetric_4 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_15, torch.int8), kwargs = {})
%getitem_12 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_4, 0), kwargs = {})
%getitem_13 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_4, 1), kwargs = {})
%quantize_per_token_4 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_15, %getitem_12, %getitem_13, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_4 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_4, %getitem_12, %getitem_13, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_4 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg83_1, %arg84_1, %arg85_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_4 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_4,), kwargs = {})
%view_28 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_4, [1, 4096]), kwargs = {})
%mm_4 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_28, %t_4), kwargs = {})
%view_29 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_4, [1, 1, 11008]), kwargs = {})
%silu : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_29,), kwargs = {})
%choose_qparams_per_token_asymmetric_5 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_15, torch.int8), kwargs = {})
%getitem_14 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_5, 0), kwargs = {})
%getitem_15 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_5, 1), kwargs = {})
%quantize_per_token_5 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_15, %getitem_14, %getitem_15, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_5 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_5, %getitem_14, %getitem_15, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_5 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg86_1, %arg87_1, %arg88_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_5 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_5,), kwargs = {})
%view_30 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_5, [1, 4096]), kwargs = {})
%mm_5 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_30, %t_5), kwargs = {})
%view_31 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_5, [1, 1, 11008]), kwargs = {})
%mul_16 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu, %view_31), kwargs = {})
%choose_qparams_per_token_asymmetric_6 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_16, torch.int8), kwargs = {})
%getitem_16 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_6, 0), kwargs = {})
%getitem_17 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_6, 1), kwargs = {})
%quantize_per_token_6 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_16, %getitem_16, %getitem_17, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_6 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_6, %getitem_16, %getitem_17, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_6 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg89_1, %arg90_1, %arg91_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_6 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_6,), kwargs = {})
%view_32 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_6, [1, 11008]), kwargs = {})
%mm_6 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_32, %t_6), kwargs = {})
%view_33 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_6, [1, 1, 4096]), kwargs = {})
%add_6 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_4, %view_33), kwargs = {})
%mul_17 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_6, %add_6), kwargs = {})
%mean_2 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_17, [-1], True), kwargs = {})
%add_7 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_2, 1e-06), kwargs = {})
%rsqrt_2 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_7,), kwargs = {})
%mul_18 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_6, %rsqrt_2), kwargs = {})
%mul_19 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_18, %arg3_1), kwargs = {})
%choose_qparams_per_token_asymmetric_7 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_19, torch.int8), kwargs = {})
%getitem_18 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_7, 0), kwargs = {})
%getitem_19 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_7, 1), kwargs = {})
%quantize_per_token_7 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_19, %getitem_18, %getitem_19, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_7 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_7, %getitem_18, %getitem_19, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_7 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg92_1, %arg93_1, %arg94_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_7 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_7,), kwargs = {})
%view_34 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_7, [1, 4096]), kwargs = {})
%mm_7 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_34, %t_7), kwargs = {})
%view_35 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_7, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_8 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_19, torch.int8), kwargs = {})
%getitem_20 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_8, 0), kwargs = {})
%getitem_21 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_8, 1), kwargs = {})
%quantize_per_token_8 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_19, %getitem_20, %getitem_21, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_8 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_8, %getitem_20, %getitem_21, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_8 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg95_1, %arg96_1, %arg97_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_8 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_8,), kwargs = {})
%view_36 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_8, [1, 4096]), kwargs = {})
%mm_8 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_36, %t_8), kwargs = {})
%view_37 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_8, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_9 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_19, torch.int8), kwargs = {})
%getitem_22 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_9, 0), kwargs = {})
%getitem_23 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_9, 1), kwargs = {})
%quantize_per_token_9 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_19, %getitem_22, %getitem_23, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_9 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_9, %getitem_22, %getitem_23, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_9 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg98_1, %arg99_1, %arg100_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_9 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_9,), kwargs = {})
%view_38 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_9, [1, 4096]), kwargs = {})
%mm_9 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_38, %t_9), kwargs = {})
%view_39 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_9, [1, 1, 4096]), kwargs = {})
%view_40 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_35, [1, 1, 32, 128]), kwargs = {})
%view_41 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_37, [1, 1, 32, 128]), kwargs = {})
%view_42 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_39, [1, 1, 32, 128]), kwargs = {})
%view_43 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_40, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_2 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_43, -1), kwargs = {})
%getitem_24 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_2, 0), kwargs = {})
%getitem_25 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_2, 1), kwargs = {})
%view_44 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_41, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_3 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_44, -1), kwargs = {})
%getitem_26 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_3, 0), kwargs = {})
%getitem_27 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_3, 1), kwargs = {})
%view_45 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_46 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_20 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_24, %view_45), kwargs = {})
%mul_21 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_25, %view_46), kwargs = {})
%sub_2 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_20, %mul_21), kwargs = {})
%mul_22 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_24, %view_46), kwargs = {})
%mul_23 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_25, %view_45), kwargs = {})
%add_8 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_22, %mul_23), kwargs = {})
%mul_24 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_26, %view_45), kwargs = {})
%mul_25 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_27, %view_46), kwargs = {})
%sub_3 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_24, %mul_25), kwargs = {})
%mul_26 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_26, %view_46), kwargs = {})
%mul_27 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_27, %view_45), kwargs = {})
%add_9 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_26, %mul_27), kwargs = {})
%stack_2 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_2, %add_8], -1), kwargs = {})
%view_47 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_2, [1, 1, 32, 128]), kwargs = {})
%stack_3 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_3, %add_9], -1), kwargs = {})
%view_48 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_3, [1, 1, 32, 128]), kwargs = {})
%transpose_5 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_47, 1, 2), kwargs = {})
%transpose_6 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_48, 1, 2), kwargs = {})
%transpose_7 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_42, 1, 2), kwargs = {})
%slice_11 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg101_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_12 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_11, 1, 0, 9223372036854775807), kwargs = {})
%view_49 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_6, [32, 1, 128]), kwargs = {})
%index_put_2 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_12, [None, None, %arg840_1], %view_49), kwargs = {})
%slice_13 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg101_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_4 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_13, %index_put_2, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_5 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg101_1, %slice_scatter_4, 0, 0, 9223372036854775807), kwargs = {})
%slice_16 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg102_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_17 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_16, 1, 0, 9223372036854775807), kwargs = {})
%view_50 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_7, [32, 1, 128]), kwargs = {})
%index_put_3 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_17, [None, None, %arg840_1], %view_50), kwargs = {})
%slice_18 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg102_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_6 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_18, %index_put_3, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_7 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg102_1, %slice_scatter_6, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_6 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg103_1, 0), kwargs = {})
%unsqueeze_7 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_6, 1), kwargs = {})
%index_3 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_7, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_9 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_5, 2), kwargs = {})
%expand_9 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_9, [1, 32, 1, 128, 128]), kwargs = {})
%clone_2 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_9,), kwargs = {memory_format: torch.contiguous_format})
%view_51 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_2, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_11 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_7, 2), kwargs = {})
%expand_11 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_11, [1, 32, 1, 128, 128]), kwargs = {})
%clone_3 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_11,), kwargs = {memory_format: torch.contiguous_format})
%view_52 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_3, [1, 32, 128, 128]), kwargs = {})
%zeros_like_1 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_3,), kwargs = {dtype: torch.float32})
%logical_not_1 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_3,), kwargs = {})
%masked_fill_1 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_1, %logical_not_1, -inf), kwargs = {})
%mul_28 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_5, 0.29730177875068026), kwargs = {})
%transpose_8 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_51, -2, -1), kwargs = {})
%mul_29 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_8, 0.29730177875068026), kwargs = {})
%expand_12 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_28, [1, 32, 1, 128]), kwargs = {})
%view_53 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_12, [32, 1, 128]), kwargs = {})
%expand_13 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_29, [1, 32, 128, 128]), kwargs = {})
%view_54 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_13, [32, 128, 128]), kwargs = {})
%bmm_2 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_53, %view_54), kwargs = {})
%view_55 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_2, [1, 32, 1, 128]), kwargs = {})
%add_10 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_55, %masked_fill_1), kwargs = {})
%_softmax_1 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_10, -1, False), kwargs = {})
%expand_14 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_1, [1, 32, 1, 128]), kwargs = {})
%view_56 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_14, [32, 1, 128]), kwargs = {})
%expand_15 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_52, [1, 32, 128, 128]), kwargs = {})
%view_57 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_15, [32, 128, 128]), kwargs = {})
%bmm_3 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_56, %view_57), kwargs = {})
%view_58 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_3, [1, 32, 1, 128]), kwargs = {})
%transpose_9 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_58, 1, 2), kwargs = {})
%view_59 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_9, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_10 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_59, torch.int8), kwargs = {})
%getitem_28 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_10, 0), kwargs = {})
%getitem_29 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_10, 1), kwargs = {})
%quantize_per_token_10 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_59, %getitem_28, %getitem_29, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_10 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_10, %getitem_28, %getitem_29, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_10 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg104_1, %arg105_1, %arg106_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_10 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_10,), kwargs = {})
%view_60 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_10, [1, 4096]), kwargs = {})
%mm_10 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_60, %t_10), kwargs = {})
%view_61 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_10, [1, 1, 4096]), kwargs = {})
%add_11 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_6, %view_61), kwargs = {})
%mul_30 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_11, %add_11), kwargs = {})
%mean_3 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_30, [-1], True), kwargs = {})
%add_12 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_3, 1e-06), kwargs = {})
%rsqrt_3 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_12,), kwargs = {})
%mul_31 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_11, %rsqrt_3), kwargs = {})
%mul_32 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_31, %arg4_1), kwargs = {})
%choose_qparams_per_token_asymmetric_11 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_32, torch.int8), kwargs = {})
%getitem_30 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_11, 0), kwargs = {})
%getitem_31 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_11, 1), kwargs = {})
%quantize_per_token_11 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_32, %getitem_30, %getitem_31, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_11 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_11, %getitem_30, %getitem_31, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_11 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg107_1, %arg108_1, %arg109_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_11 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_11,), kwargs = {})
%view_62 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_11, [1, 4096]), kwargs = {})
%mm_11 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_62, %t_11), kwargs = {})
%view_63 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_11, [1, 1, 11008]), kwargs = {})
%silu_1 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_63,), kwargs = {})
%choose_qparams_per_token_asymmetric_12 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_32, torch.int8), kwargs = {})
%getitem_32 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_12, 0), kwargs = {})
%getitem_33 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_12, 1), kwargs = {})
%quantize_per_token_12 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_32, %getitem_32, %getitem_33, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_12 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_12, %getitem_32, %getitem_33, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_12 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg110_1, %arg111_1, %arg112_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_12 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_12,), kwargs = {})
%view_64 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_12, [1, 4096]), kwargs = {})
%mm_12 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_64, %t_12), kwargs = {})
%view_65 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_12, [1, 1, 11008]), kwargs = {})
%mul_33 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_1, %view_65), kwargs = {})
%choose_qparams_per_token_asymmetric_13 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_33, torch.int8), kwargs = {})
%getitem_34 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_13, 0), kwargs = {})
%getitem_35 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_13, 1), kwargs = {})
%quantize_per_token_13 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_33, %getitem_34, %getitem_35, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_13 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_13, %getitem_34, %getitem_35, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_13 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg113_1, %arg114_1, %arg115_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_13 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_13,), kwargs = {})
%view_66 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_13, [1, 11008]), kwargs = {})
%mm_13 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_66, %t_13), kwargs = {})
%view_67 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_13, [1, 1, 4096]), kwargs = {})
%add_13 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_11, %view_67), kwargs = {})
%mul_34 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_13, %add_13), kwargs = {})
%mean_4 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_34, [-1], True), kwargs = {})
%add_14 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_4, 1e-06), kwargs = {})
%rsqrt_4 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_14,), kwargs = {})
%mul_35 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_13, %rsqrt_4), kwargs = {})
%mul_36 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_35, %arg5_1), kwargs = {})
%choose_qparams_per_token_asymmetric_14 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_36, torch.int8), kwargs = {})
%getitem_36 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_14, 0), kwargs = {})
%getitem_37 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_14, 1), kwargs = {})
%quantize_per_token_14 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_36, %getitem_36, %getitem_37, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_14 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_14, %getitem_36, %getitem_37, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_14 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg116_1, %arg117_1, %arg118_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_14 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_14,), kwargs = {})
%view_68 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_14, [1, 4096]), kwargs = {})
%mm_14 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_68, %t_14), kwargs = {})
%view_69 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_14, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_15 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_36, torch.int8), kwargs = {})
%getitem_38 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_15, 0), kwargs = {})
%getitem_39 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_15, 1), kwargs = {})
%quantize_per_token_15 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_36, %getitem_38, %getitem_39, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_15 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_15, %getitem_38, %getitem_39, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_15 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg119_1, %arg120_1, %arg121_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_15 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_15,), kwargs = {})
%view_70 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_15, [1, 4096]), kwargs = {})
%mm_15 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_70, %t_15), kwargs = {})
%view_71 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_15, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_16 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_36, torch.int8), kwargs = {})
%getitem_40 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_16, 0), kwargs = {})
%getitem_41 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_16, 1), kwargs = {})
%quantize_per_token_16 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_36, %getitem_40, %getitem_41, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_16 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_16, %getitem_40, %getitem_41, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_16 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg122_1, %arg123_1, %arg124_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_16 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_16,), kwargs = {})
%view_72 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_16, [1, 4096]), kwargs = {})
%mm_16 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_72, %t_16), kwargs = {})
%view_73 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_16, [1, 1, 4096]), kwargs = {})
%view_74 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_69, [1, 1, 32, 128]), kwargs = {})
%view_75 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_71, [1, 1, 32, 128]), kwargs = {})
%view_76 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_73, [1, 1, 32, 128]), kwargs = {})
%view_77 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_74, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_4 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_77, -1), kwargs = {})
%getitem_42 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_4, 0), kwargs = {})
%getitem_43 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_4, 1), kwargs = {})
%view_78 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_75, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_5 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_78, -1), kwargs = {})
%getitem_44 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_5, 0), kwargs = {})
%getitem_45 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_5, 1), kwargs = {})
%view_79 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_80 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_37 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_42, %view_79), kwargs = {})
%mul_38 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_43, %view_80), kwargs = {})
%sub_4 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_37, %mul_38), kwargs = {})
%mul_39 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_42, %view_80), kwargs = {})
%mul_40 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_43, %view_79), kwargs = {})
%add_15 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_39, %mul_40), kwargs = {})
%mul_41 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_44, %view_79), kwargs = {})
%mul_42 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_45, %view_80), kwargs = {})
%sub_5 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_41, %mul_42), kwargs = {})
%mul_43 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_44, %view_80), kwargs = {})
%mul_44 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_45, %view_79), kwargs = {})
%add_16 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_43, %mul_44), kwargs = {})
%stack_4 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_4, %add_15], -1), kwargs = {})
%view_81 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_4, [1, 1, 32, 128]), kwargs = {})
%stack_5 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_5, %add_16], -1), kwargs = {})
%view_82 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_5, [1, 1, 32, 128]), kwargs = {})
%transpose_10 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_81, 1, 2), kwargs = {})
%transpose_11 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_82, 1, 2), kwargs = {})
%transpose_12 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_76, 1, 2), kwargs = {})
%slice_21 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg125_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_22 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_21, 1, 0, 9223372036854775807), kwargs = {})
%view_83 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_11, [32, 1, 128]), kwargs = {})
%index_put_4 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_22, [None, None, %arg840_1], %view_83), kwargs = {})
%slice_23 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg125_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_8 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_23, %index_put_4, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_9 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg125_1, %slice_scatter_8, 0, 0, 9223372036854775807), kwargs = {})
%slice_26 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg126_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_27 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_26, 1, 0, 9223372036854775807), kwargs = {})
%view_84 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_12, [32, 1, 128]), kwargs = {})
%index_put_5 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_27, [None, None, %arg840_1], %view_84), kwargs = {})
%slice_28 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg126_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_10 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_28, %index_put_5, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_11 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg126_1, %slice_scatter_10, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_12 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg127_1, 0), kwargs = {})
%unsqueeze_13 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_12, 1), kwargs = {})
%index_4 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_13, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_15 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_9, 2), kwargs = {})
%expand_17 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_15, [1, 32, 1, 128, 128]), kwargs = {})
%clone_4 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_17,), kwargs = {memory_format: torch.contiguous_format})
%view_85 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_4, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_17 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_11, 2), kwargs = {})
%expand_19 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_17, [1, 32, 1, 128, 128]), kwargs = {})
%clone_5 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_19,), kwargs = {memory_format: torch.contiguous_format})
%view_86 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_5, [1, 32, 128, 128]), kwargs = {})
%zeros_like_2 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_4,), kwargs = {dtype: torch.float32})
%logical_not_2 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_4,), kwargs = {})
%masked_fill_2 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_2, %logical_not_2, -inf), kwargs = {})
%mul_45 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_10, 0.29730177875068026), kwargs = {})
%transpose_13 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_85, -2, -1), kwargs = {})
%mul_46 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_13, 0.29730177875068026), kwargs = {})
%expand_20 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_45, [1, 32, 1, 128]), kwargs = {})
%view_87 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_20, [32, 1, 128]), kwargs = {})
%expand_21 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_46, [1, 32, 128, 128]), kwargs = {})
%view_88 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_21, [32, 128, 128]), kwargs = {})
%bmm_4 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_87, %view_88), kwargs = {})
%view_89 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_4, [1, 32, 1, 128]), kwargs = {})
%add_17 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_89, %masked_fill_2), kwargs = {})
%_softmax_2 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_17, -1, False), kwargs = {})
%expand_22 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_2, [1, 32, 1, 128]), kwargs = {})
%view_90 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_22, [32, 1, 128]), kwargs = {})
%expand_23 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_86, [1, 32, 128, 128]), kwargs = {})
%view_91 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_23, [32, 128, 128]), kwargs = {})
%bmm_5 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_90, %view_91), kwargs = {})
%view_92 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_5, [1, 32, 1, 128]), kwargs = {})
%transpose_14 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_92, 1, 2), kwargs = {})
%view_93 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_14, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_17 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_93, torch.int8), kwargs = {})
%getitem_46 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_17, 0), kwargs = {})
%getitem_47 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_17, 1), kwargs = {})
%quantize_per_token_17 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_93, %getitem_46, %getitem_47, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_17 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_17, %getitem_46, %getitem_47, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_17 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg128_1, %arg129_1, %arg130_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_17 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_17,), kwargs = {})
%view_94 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_17, [1, 4096]), kwargs = {})
%mm_17 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_94, %t_17), kwargs = {})
%view_95 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_17, [1, 1, 4096]), kwargs = {})
%add_18 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_13, %view_95), kwargs = {})
%mul_47 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_18, %add_18), kwargs = {})
%mean_5 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_47, [-1], True), kwargs = {})
%add_19 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_5, 1e-06), kwargs = {})
%rsqrt_5 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_19,), kwargs = {})
%mul_48 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_18, %rsqrt_5), kwargs = {})
%mul_49 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_48, %arg6_1), kwargs = {})
%choose_qparams_per_token_asymmetric_18 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_49, torch.int8), kwargs = {})
%getitem_48 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_18, 0), kwargs = {})
%getitem_49 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_18, 1), kwargs = {})
%quantize_per_token_18 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_49, %getitem_48, %getitem_49, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_18 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_18, %getitem_48, %getitem_49, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_18 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg131_1, %arg132_1, %arg133_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_18 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_18,), kwargs = {})
%view_96 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_18, [1, 4096]), kwargs = {})
%mm_18 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_96, %t_18), kwargs = {})
%view_97 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_18, [1, 1, 11008]), kwargs = {})
%silu_2 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_97,), kwargs = {})
%choose_qparams_per_token_asymmetric_19 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_49, torch.int8), kwargs = {})
%getitem_50 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_19, 0), kwargs = {})
%getitem_51 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_19, 1), kwargs = {})
%quantize_per_token_19 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_49, %getitem_50, %getitem_51, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_19 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_19, %getitem_50, %getitem_51, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_19 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg134_1, %arg135_1, %arg136_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_19 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_19,), kwargs = {})
%view_98 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_19, [1, 4096]), kwargs = {})
%mm_19 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_98, %t_19), kwargs = {})
%view_99 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_19, [1, 1, 11008]), kwargs = {})
%mul_50 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_2, %view_99), kwargs = {})
%choose_qparams_per_token_asymmetric_20 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_50, torch.int8), kwargs = {})
%getitem_52 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_20, 0), kwargs = {})
%getitem_53 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_20, 1), kwargs = {})
%quantize_per_token_20 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_50, %getitem_52, %getitem_53, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_20 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_20, %getitem_52, %getitem_53, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_20 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg137_1, %arg138_1, %arg139_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_20 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_20,), kwargs = {})
%view_100 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_20, [1, 11008]), kwargs = {})
%mm_20 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_100, %t_20), kwargs = {})
%view_101 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_20, [1, 1, 4096]), kwargs = {})
%add_20 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_18, %view_101), kwargs = {})
%mul_51 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_20, %add_20), kwargs = {})
%mean_6 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_51, [-1], True), kwargs = {})
%add_21 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_6, 1e-06), kwargs = {})
%rsqrt_6 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_21,), kwargs = {})
%mul_52 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_20, %rsqrt_6), kwargs = {})
%mul_53 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_52, %arg7_1), kwargs = {})
%choose_qparams_per_token_asymmetric_21 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_53, torch.int8), kwargs = {})
%getitem_54 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_21, 0), kwargs = {})
%getitem_55 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_21, 1), kwargs = {})
%quantize_per_token_21 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_53, %getitem_54, %getitem_55, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_21 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_21, %getitem_54, %getitem_55, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_21 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg140_1, %arg141_1, %arg142_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_21 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_21,), kwargs = {})
%view_102 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_21, [1, 4096]), kwargs = {})
%mm_21 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_102, %t_21), kwargs = {})
%view_103 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_21, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_22 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_53, torch.int8), kwargs = {})
%getitem_56 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_22, 0), kwargs = {})
%getitem_57 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_22, 1), kwargs = {})
%quantize_per_token_22 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_53, %getitem_56, %getitem_57, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_22 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_22, %getitem_56, %getitem_57, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_22 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg143_1, %arg144_1, %arg145_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_22 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_22,), kwargs = {})
%view_104 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_22, [1, 4096]), kwargs = {})
%mm_22 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_104, %t_22), kwargs = {})
%view_105 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_22, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_23 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_53, torch.int8), kwargs = {})
%getitem_58 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_23, 0), kwargs = {})
%getitem_59 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_23, 1), kwargs = {})
%quantize_per_token_23 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_53, %getitem_58, %getitem_59, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_23 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_23, %getitem_58, %getitem_59, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_23 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg146_1, %arg147_1, %arg148_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_23 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_23,), kwargs = {})
%view_106 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_23, [1, 4096]), kwargs = {})
%mm_23 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_106, %t_23), kwargs = {})
%view_107 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_23, [1, 1, 4096]), kwargs = {})
%view_108 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_103, [1, 1, 32, 128]), kwargs = {})
%view_109 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_105, [1, 1, 32, 128]), kwargs = {})
%view_110 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_107, [1, 1, 32, 128]), kwargs = {})
%view_111 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_108, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_6 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_111, -1), kwargs = {})
%getitem_60 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_6, 0), kwargs = {})
%getitem_61 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_6, 1), kwargs = {})
%view_112 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_109, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_7 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_112, -1), kwargs = {})
%getitem_62 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_7, 0), kwargs = {})
%getitem_63 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_7, 1), kwargs = {})
%view_113 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_114 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_54 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_60, %view_113), kwargs = {})
%mul_55 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_61, %view_114), kwargs = {})
%sub_6 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_54, %mul_55), kwargs = {})
%mul_56 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_60, %view_114), kwargs = {})
%mul_57 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_61, %view_113), kwargs = {})
%add_22 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_56, %mul_57), kwargs = {})
%mul_58 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_62, %view_113), kwargs = {})
%mul_59 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_63, %view_114), kwargs = {})
%sub_7 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_58, %mul_59), kwargs = {})
%mul_60 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_62, %view_114), kwargs = {})
%mul_61 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_63, %view_113), kwargs = {})
%add_23 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_60, %mul_61), kwargs = {})
%stack_6 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_6, %add_22], -1), kwargs = {})
%view_115 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_6, [1, 1, 32, 128]), kwargs = {})
%stack_7 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_7, %add_23], -1), kwargs = {})
%view_116 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_7, [1, 1, 32, 128]), kwargs = {})
%transpose_15 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_115, 1, 2), kwargs = {})
%transpose_16 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_116, 1, 2), kwargs = {})
%transpose_17 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_110, 1, 2), kwargs = {})
%slice_31 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg149_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_32 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_31, 1, 0, 9223372036854775807), kwargs = {})
%view_117 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_16, [32, 1, 128]), kwargs = {})
%index_put_6 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_32, [None, None, %arg840_1], %view_117), kwargs = {})
%slice_33 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg149_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_12 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_33, %index_put_6, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_13 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg149_1, %slice_scatter_12, 0, 0, 9223372036854775807), kwargs = {})
%slice_36 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg150_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_37 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_36, 1, 0, 9223372036854775807), kwargs = {})
%view_118 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_17, [32, 1, 128]), kwargs = {})
%index_put_7 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_37, [None, None, %arg840_1], %view_118), kwargs = {})
%slice_38 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg150_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_14 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_38, %index_put_7, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_15 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg150_1, %slice_scatter_14, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_18 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg151_1, 0), kwargs = {})
%unsqueeze_19 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_18, 1), kwargs = {})
%index_5 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_19, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_21 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_13, 2), kwargs = {})
%expand_25 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_21, [1, 32, 1, 128, 128]), kwargs = {})
%clone_6 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_25,), kwargs = {memory_format: torch.contiguous_format})
%view_119 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_6, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_23 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_15, 2), kwargs = {})
%expand_27 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_23, [1, 32, 1, 128, 128]), kwargs = {})
%clone_7 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_27,), kwargs = {memory_format: torch.contiguous_format})
%view_120 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_7, [1, 32, 128, 128]), kwargs = {})
%zeros_like_3 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_5,), kwargs = {dtype: torch.float32})
%logical_not_3 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_5,), kwargs = {})
%masked_fill_3 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_3, %logical_not_3, -inf), kwargs = {})
%mul_62 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_15, 0.29730177875068026), kwargs = {})
%transpose_18 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_119, -2, -1), kwargs = {})
%mul_63 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_18, 0.29730177875068026), kwargs = {})
%expand_28 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_62, [1, 32, 1, 128]), kwargs = {})
%view_121 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_28, [32, 1, 128]), kwargs = {})
%expand_29 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_63, [1, 32, 128, 128]), kwargs = {})
%view_122 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_29, [32, 128, 128]), kwargs = {})
%bmm_6 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_121, %view_122), kwargs = {})
%view_123 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_6, [1, 32, 1, 128]), kwargs = {})
%add_24 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_123, %masked_fill_3), kwargs = {})
%_softmax_3 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_24, -1, False), kwargs = {})
%expand_30 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_3, [1, 32, 1, 128]), kwargs = {})
%view_124 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_30, [32, 1, 128]), kwargs = {})
%expand_31 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_120, [1, 32, 128, 128]), kwargs = {})
%view_125 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_31, [32, 128, 128]), kwargs = {})
%bmm_7 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_124, %view_125), kwargs = {})
%view_126 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_7, [1, 32, 1, 128]), kwargs = {})
%transpose_19 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_126, 1, 2), kwargs = {})
%view_127 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_19, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_24 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_127, torch.int8), kwargs = {})
%getitem_64 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_24, 0), kwargs = {})
%getitem_65 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_24, 1), kwargs = {})
%quantize_per_token_24 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_127, %getitem_64, %getitem_65, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_24 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_24, %getitem_64, %getitem_65, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_24 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg152_1, %arg153_1, %arg154_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_24 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_24,), kwargs = {})
%view_128 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_24, [1, 4096]), kwargs = {})
%mm_24 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_128, %t_24), kwargs = {})
%view_129 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_24, [1, 1, 4096]), kwargs = {})
%add_25 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_20, %view_129), kwargs = {})
%mul_64 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_25, %add_25), kwargs = {})
%mean_7 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_64, [-1], True), kwargs = {})
%add_26 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_7, 1e-06), kwargs = {})
%rsqrt_7 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_26,), kwargs = {})
%mul_65 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_25, %rsqrt_7), kwargs = {})
%mul_66 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_65, %arg8_1), kwargs = {})
%choose_qparams_per_token_asymmetric_25 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_66, torch.int8), kwargs = {})
%getitem_66 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_25, 0), kwargs = {})
%getitem_67 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_25, 1), kwargs = {})
%quantize_per_token_25 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_66, %getitem_66, %getitem_67, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_25 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_25, %getitem_66, %getitem_67, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_25 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg155_1, %arg156_1, %arg157_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_25 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_25,), kwargs = {})
%view_130 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_25, [1, 4096]), kwargs = {})
%mm_25 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_130, %t_25), kwargs = {})
%view_131 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_25, [1, 1, 11008]), kwargs = {})
%silu_3 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_131,), kwargs = {})
%choose_qparams_per_token_asymmetric_26 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_66, torch.int8), kwargs = {})
%getitem_68 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_26, 0), kwargs = {})
%getitem_69 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_26, 1), kwargs = {})
%quantize_per_token_26 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_66, %getitem_68, %getitem_69, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_26 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_26, %getitem_68, %getitem_69, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_26 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg158_1, %arg159_1, %arg160_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_26 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_26,), kwargs = {})
%view_132 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_26, [1, 4096]), kwargs = {})
%mm_26 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_132, %t_26), kwargs = {})
%view_133 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_26, [1, 1, 11008]), kwargs = {})
%mul_67 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_3, %view_133), kwargs = {})
%choose_qparams_per_token_asymmetric_27 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_67, torch.int8), kwargs = {})
%getitem_70 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_27, 0), kwargs = {})
%getitem_71 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_27, 1), kwargs = {})
%quantize_per_token_27 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_67, %getitem_70, %getitem_71, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_27 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_27, %getitem_70, %getitem_71, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_27 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg161_1, %arg162_1, %arg163_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_27 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_27,), kwargs = {})
%view_134 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_27, [1, 11008]), kwargs = {})
%mm_27 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_134, %t_27), kwargs = {})
%view_135 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_27, [1, 1, 4096]), kwargs = {})
%add_27 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_25, %view_135), kwargs = {})
%mul_68 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_27, %add_27), kwargs = {})
%mean_8 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_68, [-1], True), kwargs = {})
%add_28 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_8, 1e-06), kwargs = {})
%rsqrt_8 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_28,), kwargs = {})
%mul_69 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_27, %rsqrt_8), kwargs = {})
%mul_70 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_69, %arg9_1), kwargs = {})
%choose_qparams_per_token_asymmetric_28 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_70, torch.int8), kwargs = {})
%getitem_72 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_28, 0), kwargs = {})
%getitem_73 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_28, 1), kwargs = {})
%quantize_per_token_28 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_70, %getitem_72, %getitem_73, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_28 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_28, %getitem_72, %getitem_73, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_28 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg164_1, %arg165_1, %arg166_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_28 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_28,), kwargs = {})
%view_136 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_28, [1, 4096]), kwargs = {})
%mm_28 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_136, %t_28), kwargs = {})
%view_137 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_28, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_29 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_70, torch.int8), kwargs = {})
%getitem_74 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_29, 0), kwargs = {})
%getitem_75 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_29, 1), kwargs = {})
%quantize_per_token_29 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_70, %getitem_74, %getitem_75, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_29 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_29, %getitem_74, %getitem_75, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_29 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg167_1, %arg168_1, %arg169_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_29 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_29,), kwargs = {})
%view_138 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_29, [1, 4096]), kwargs = {})
%mm_29 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_138, %t_29), kwargs = {})
%view_139 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_29, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_30 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_70, torch.int8), kwargs = {})
%getitem_76 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_30, 0), kwargs = {})
%getitem_77 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_30, 1), kwargs = {})
%quantize_per_token_30 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_70, %getitem_76, %getitem_77, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_30 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_30, %getitem_76, %getitem_77, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_30 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg170_1, %arg171_1, %arg172_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_30 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_30,), kwargs = {})
%view_140 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_30, [1, 4096]), kwargs = {})
%mm_30 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_140, %t_30), kwargs = {})
%view_141 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_30, [1, 1, 4096]), kwargs = {})
%view_142 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_137, [1, 1, 32, 128]), kwargs = {})
%view_143 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_139, [1, 1, 32, 128]), kwargs = {})
%view_144 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_141, [1, 1, 32, 128]), kwargs = {})
%view_145 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_142, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_8 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_145, -1), kwargs = {})
%getitem_78 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_8, 0), kwargs = {})
%getitem_79 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_8, 1), kwargs = {})
%view_146 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_143, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_9 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_146, -1), kwargs = {})
%getitem_80 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_9, 0), kwargs = {})
%getitem_81 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_9, 1), kwargs = {})
%view_147 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_148 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_71 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_78, %view_147), kwargs = {})
%mul_72 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_79, %view_148), kwargs = {})
%sub_8 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_71, %mul_72), kwargs = {})
%mul_73 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_78, %view_148), kwargs = {})
%mul_74 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_79, %view_147), kwargs = {})
%add_29 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_73, %mul_74), kwargs = {})
%mul_75 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_80, %view_147), kwargs = {})
%mul_76 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_81, %view_148), kwargs = {})
%sub_9 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_75, %mul_76), kwargs = {})
%mul_77 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_80, %view_148), kwargs = {})
%mul_78 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_81, %view_147), kwargs = {})
%add_30 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_77, %mul_78), kwargs = {})
%stack_8 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_8, %add_29], -1), kwargs = {})
%view_149 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_8, [1, 1, 32, 128]), kwargs = {})
%stack_9 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_9, %add_30], -1), kwargs = {})
%view_150 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_9, [1, 1, 32, 128]), kwargs = {})
%transpose_20 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_149, 1, 2), kwargs = {})
%transpose_21 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_150, 1, 2), kwargs = {})
%transpose_22 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_144, 1, 2), kwargs = {})
%slice_41 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg173_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_42 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_41, 1, 0, 9223372036854775807), kwargs = {})
%view_151 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_21, [32, 1, 128]), kwargs = {})
%index_put_8 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_42, [None, None, %arg840_1], %view_151), kwargs = {})
%slice_43 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg173_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_16 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_43, %index_put_8, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_17 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg173_1, %slice_scatter_16, 0, 0, 9223372036854775807), kwargs = {})
%slice_46 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg174_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_47 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_46, 1, 0, 9223372036854775807), kwargs = {})
%view_152 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_22, [32, 1, 128]), kwargs = {})
%index_put_9 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_47, [None, None, %arg840_1], %view_152), kwargs = {})
%slice_48 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg174_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_18 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_48, %index_put_9, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_19 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg174_1, %slice_scatter_18, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_24 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg175_1, 0), kwargs = {})
%unsqueeze_25 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_24, 1), kwargs = {})
%index_6 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_25, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_27 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_17, 2), kwargs = {})
%expand_33 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_27, [1, 32, 1, 128, 128]), kwargs = {})
%clone_8 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_33,), kwargs = {memory_format: torch.contiguous_format})
%view_153 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_8, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_29 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_19, 2), kwargs = {})
%expand_35 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_29, [1, 32, 1, 128, 128]), kwargs = {})
%clone_9 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_35,), kwargs = {memory_format: torch.contiguous_format})
%view_154 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_9, [1, 32, 128, 128]), kwargs = {})
%zeros_like_4 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_6,), kwargs = {dtype: torch.float32})
%logical_not_4 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_6,), kwargs = {})
%masked_fill_4 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_4, %logical_not_4, -inf), kwargs = {})
%mul_79 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_20, 0.29730177875068026), kwargs = {})
%transpose_23 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_153, -2, -1), kwargs = {})
%mul_80 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_23, 0.29730177875068026), kwargs = {})
%expand_36 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_79, [1, 32, 1, 128]), kwargs = {})
%view_155 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_36, [32, 1, 128]), kwargs = {})
%expand_37 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_80, [1, 32, 128, 128]), kwargs = {})
%view_156 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_37, [32, 128, 128]), kwargs = {})
%bmm_8 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_155, %view_156), kwargs = {})
%view_157 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_8, [1, 32, 1, 128]), kwargs = {})
%add_31 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_157, %masked_fill_4), kwargs = {})
%_softmax_4 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_31, -1, False), kwargs = {})
%expand_38 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_4, [1, 32, 1, 128]), kwargs = {})
%view_158 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_38, [32, 1, 128]), kwargs = {})
%expand_39 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_154, [1, 32, 128, 128]), kwargs = {})
%view_159 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_39, [32, 128, 128]), kwargs = {})
%bmm_9 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_158, %view_159), kwargs = {})
%view_160 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_9, [1, 32, 1, 128]), kwargs = {})
%transpose_24 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_160, 1, 2), kwargs = {})
%view_161 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_24, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_31 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_161, torch.int8), kwargs = {})
%getitem_82 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_31, 0), kwargs = {})
%getitem_83 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_31, 1), kwargs = {})
%quantize_per_token_31 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_161, %getitem_82, %getitem_83, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_31 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_31, %getitem_82, %getitem_83, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_31 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg176_1, %arg177_1, %arg178_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_31 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_31,), kwargs = {})
%view_162 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_31, [1, 4096]), kwargs = {})
%mm_31 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_162, %t_31), kwargs = {})
%view_163 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_31, [1, 1, 4096]), kwargs = {})
%add_32 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_27, %view_163), kwargs = {})
%mul_81 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_32, %add_32), kwargs = {})
%mean_9 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_81, [-1], True), kwargs = {})
%add_33 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_9, 1e-06), kwargs = {})
%rsqrt_9 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_33,), kwargs = {})
%mul_82 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_32, %rsqrt_9), kwargs = {})
%mul_83 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_82, %arg10_1), kwargs = {})
%choose_qparams_per_token_asymmetric_32 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_83, torch.int8), kwargs = {})
%getitem_84 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_32, 0), kwargs = {})
%getitem_85 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_32, 1), kwargs = {})
%quantize_per_token_32 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_83, %getitem_84, %getitem_85, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_32 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_32, %getitem_84, %getitem_85, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_32 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg179_1, %arg180_1, %arg181_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_32 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_32,), kwargs = {})
%view_164 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_32, [1, 4096]), kwargs = {})
%mm_32 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_164, %t_32), kwargs = {})
%view_165 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_32, [1, 1, 11008]), kwargs = {})
%silu_4 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_165,), kwargs = {})
%choose_qparams_per_token_asymmetric_33 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_83, torch.int8), kwargs = {})
%getitem_86 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_33, 0), kwargs = {})
%getitem_87 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_33, 1), kwargs = {})
%quantize_per_token_33 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_83, %getitem_86, %getitem_87, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_33 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_33, %getitem_86, %getitem_87, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_33 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg182_1, %arg183_1, %arg184_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_33 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_33,), kwargs = {})
%view_166 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_33, [1, 4096]), kwargs = {})
%mm_33 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_166, %t_33), kwargs = {})
%view_167 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_33, [1, 1, 11008]), kwargs = {})
%mul_84 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_4, %view_167), kwargs = {})
%choose_qparams_per_token_asymmetric_34 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_84, torch.int8), kwargs = {})
%getitem_88 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_34, 0), kwargs = {})
%getitem_89 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_34, 1), kwargs = {})
%quantize_per_token_34 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_84, %getitem_88, %getitem_89, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_34 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_34, %getitem_88, %getitem_89, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_34 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg185_1, %arg186_1, %arg187_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_34 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_34,), kwargs = {})
%view_168 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_34, [1, 11008]), kwargs = {})
%mm_34 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_168, %t_34), kwargs = {})
%view_169 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_34, [1, 1, 4096]), kwargs = {})
%add_34 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_32, %view_169), kwargs = {})
%mul_85 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_34, %add_34), kwargs = {})
%mean_10 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_85, [-1], True), kwargs = {})
%add_35 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_10, 1e-06), kwargs = {})
%rsqrt_10 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_35,), kwargs = {})
%mul_86 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_34, %rsqrt_10), kwargs = {})
%mul_87 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_86, %arg11_1), kwargs = {})
%choose_qparams_per_token_asymmetric_35 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_87, torch.int8), kwargs = {})
%getitem_90 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_35, 0), kwargs = {})
%getitem_91 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_35, 1), kwargs = {})
%quantize_per_token_35 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_87, %getitem_90, %getitem_91, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_35 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_35, %getitem_90, %getitem_91, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_35 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg188_1, %arg189_1, %arg190_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_35 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_35,), kwargs = {})
%view_170 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_35, [1, 4096]), kwargs = {})
%mm_35 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_170, %t_35), kwargs = {})
%view_171 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_35, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_36 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_87, torch.int8), kwargs = {})
%getitem_92 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_36, 0), kwargs = {})
%getitem_93 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_36, 1), kwargs = {})
%quantize_per_token_36 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_87, %getitem_92, %getitem_93, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_36 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_36, %getitem_92, %getitem_93, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_36 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg191_1, %arg192_1, %arg193_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_36 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_36,), kwargs = {})
%view_172 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_36, [1, 4096]), kwargs = {})
%mm_36 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_172, %t_36), kwargs = {})
%view_173 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_36, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_37 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_87, torch.int8), kwargs = {})
%getitem_94 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_37, 0), kwargs = {})
%getitem_95 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_37, 1), kwargs = {})
%quantize_per_token_37 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_87, %getitem_94, %getitem_95, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_37 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_37, %getitem_94, %getitem_95, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_37 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg194_1, %arg195_1, %arg196_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_37 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_37,), kwargs = {})
%view_174 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_37, [1, 4096]), kwargs = {})
%mm_37 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_174, %t_37), kwargs = {})
%view_175 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_37, [1, 1, 4096]), kwargs = {})
%view_176 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_171, [1, 1, 32, 128]), kwargs = {})
%view_177 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_173, [1, 1, 32, 128]), kwargs = {})
%view_178 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_175, [1, 1, 32, 128]), kwargs = {})
%view_179 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_176, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_10 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_179, -1), kwargs = {})
%getitem_96 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_10, 0), kwargs = {})
%getitem_97 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_10, 1), kwargs = {})
%view_180 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_177, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_11 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_180, -1), kwargs = {})
%getitem_98 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_11, 0), kwargs = {})
%getitem_99 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_11, 1), kwargs = {})
%view_181 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_182 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_88 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_96, %view_181), kwargs = {})
%mul_89 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_97, %view_182), kwargs = {})
%sub_10 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_88, %mul_89), kwargs = {})
%mul_90 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_96, %view_182), kwargs = {})
%mul_91 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_97, %view_181), kwargs = {})
%add_36 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_90, %mul_91), kwargs = {})
%mul_92 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_98, %view_181), kwargs = {})
%mul_93 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_99, %view_182), kwargs = {})
%sub_11 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_92, %mul_93), kwargs = {})
%mul_94 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_98, %view_182), kwargs = {})
%mul_95 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_99, %view_181), kwargs = {})
%add_37 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_94, %mul_95), kwargs = {})
%stack_10 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_10, %add_36], -1), kwargs = {})
%view_183 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_10, [1, 1, 32, 128]), kwargs = {})
%stack_11 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_11, %add_37], -1), kwargs = {})
%view_184 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_11, [1, 1, 32, 128]), kwargs = {})
%transpose_25 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_183, 1, 2), kwargs = {})
%transpose_26 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_184, 1, 2), kwargs = {})
%transpose_27 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_178, 1, 2), kwargs = {})
%slice_51 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg197_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_52 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_51, 1, 0, 9223372036854775807), kwargs = {})
%view_185 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_26, [32, 1, 128]), kwargs = {})
%index_put_10 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_52, [None, None, %arg840_1], %view_185), kwargs = {})
%slice_53 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg197_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_20 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_53, %index_put_10, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_21 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg197_1, %slice_scatter_20, 0, 0, 9223372036854775807), kwargs = {})
%slice_56 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg198_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_57 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_56, 1, 0, 9223372036854775807), kwargs = {})
%view_186 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_27, [32, 1, 128]), kwargs = {})
%index_put_11 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_57, [None, None, %arg840_1], %view_186), kwargs = {})
%slice_58 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg198_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_22 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_58, %index_put_11, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_23 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg198_1, %slice_scatter_22, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_30 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg199_1, 0), kwargs = {})
%unsqueeze_31 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_30, 1), kwargs = {})
%index_7 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_31, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_33 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_21, 2), kwargs = {})
%expand_41 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_33, [1, 32, 1, 128, 128]), kwargs = {})
%clone_10 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_41,), kwargs = {memory_format: torch.contiguous_format})
%view_187 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_10, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_35 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_23, 2), kwargs = {})
%expand_43 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_35, [1, 32, 1, 128, 128]), kwargs = {})
%clone_11 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_43,), kwargs = {memory_format: torch.contiguous_format})
%view_188 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_11, [1, 32, 128, 128]), kwargs = {})
%zeros_like_5 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_7,), kwargs = {dtype: torch.float32})
%logical_not_5 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_7,), kwargs = {})
%masked_fill_5 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_5, %logical_not_5, -inf), kwargs = {})
%mul_96 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_25, 0.29730177875068026), kwargs = {})
%transpose_28 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_187, -2, -1), kwargs = {})
%mul_97 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_28, 0.29730177875068026), kwargs = {})
%expand_44 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_96, [1, 32, 1, 128]), kwargs = {})
%view_189 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_44, [32, 1, 128]), kwargs = {})
%expand_45 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_97, [1, 32, 128, 128]), kwargs = {})
%view_190 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_45, [32, 128, 128]), kwargs = {})
%bmm_10 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_189, %view_190), kwargs = {})
%view_191 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_10, [1, 32, 1, 128]), kwargs = {})
%add_38 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_191, %masked_fill_5), kwargs = {})
%_softmax_5 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_38, -1, False), kwargs = {})
%expand_46 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_5, [1, 32, 1, 128]), kwargs = {})
%view_192 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_46, [32, 1, 128]), kwargs = {})
%expand_47 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_188, [1, 32, 128, 128]), kwargs = {})
%view_193 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_47, [32, 128, 128]), kwargs = {})
%bmm_11 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_192, %view_193), kwargs = {})
%view_194 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_11, [1, 32, 1, 128]), kwargs = {})
%transpose_29 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_194, 1, 2), kwargs = {})
%view_195 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_29, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_38 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_195, torch.int8), kwargs = {})
%getitem_100 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_38, 0), kwargs = {})
%getitem_101 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_38, 1), kwargs = {})
%quantize_per_token_38 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_195, %getitem_100, %getitem_101, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_38 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_38, %getitem_100, %getitem_101, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_38 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg200_1, %arg201_1, %arg202_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_38 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_38,), kwargs = {})
%view_196 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_38, [1, 4096]), kwargs = {})
%mm_38 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_196, %t_38), kwargs = {})
%view_197 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_38, [1, 1, 4096]), kwargs = {})
%add_39 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_34, %view_197), kwargs = {})
%mul_98 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_39, %add_39), kwargs = {})
%mean_11 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_98, [-1], True), kwargs = {})
%add_40 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_11, 1e-06), kwargs = {})
%rsqrt_11 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_40,), kwargs = {})
%mul_99 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_39, %rsqrt_11), kwargs = {})
%mul_100 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_99, %arg12_1), kwargs = {})
%choose_qparams_per_token_asymmetric_39 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_100, torch.int8), kwargs = {})
%getitem_102 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_39, 0), kwargs = {})
%getitem_103 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_39, 1), kwargs = {})
%quantize_per_token_39 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_100, %getitem_102, %getitem_103, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_39 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_39, %getitem_102, %getitem_103, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_39 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg203_1, %arg204_1, %arg205_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_39 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_39,), kwargs = {})
%view_198 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_39, [1, 4096]), kwargs = {})
%mm_39 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_198, %t_39), kwargs = {})
%view_199 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_39, [1, 1, 11008]), kwargs = {})
%silu_5 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_199,), kwargs = {})
%choose_qparams_per_token_asymmetric_40 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_100, torch.int8), kwargs = {})
%getitem_104 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_40, 0), kwargs = {})
%getitem_105 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_40, 1), kwargs = {})
%quantize_per_token_40 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_100, %getitem_104, %getitem_105, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_40 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_40, %getitem_104, %getitem_105, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_40 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg206_1, %arg207_1, %arg208_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_40 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_40,), kwargs = {})
%view_200 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_40, [1, 4096]), kwargs = {})
%mm_40 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_200, %t_40), kwargs = {})
%view_201 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_40, [1, 1, 11008]), kwargs = {})
%mul_101 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_5, %view_201), kwargs = {})
%choose_qparams_per_token_asymmetric_41 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_101, torch.int8), kwargs = {})
%getitem_106 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_41, 0), kwargs = {})
%getitem_107 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_41, 1), kwargs = {})
%quantize_per_token_41 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_101, %getitem_106, %getitem_107, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_41 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_41, %getitem_106, %getitem_107, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_41 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg209_1, %arg210_1, %arg211_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_41 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_41,), kwargs = {})
%view_202 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_41, [1, 11008]), kwargs = {})
%mm_41 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_202, %t_41), kwargs = {})
%view_203 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_41, [1, 1, 4096]), kwargs = {})
%add_41 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_39, %view_203), kwargs = {})
%mul_102 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_41, %add_41), kwargs = {})
%mean_12 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_102, [-1], True), kwargs = {})
%add_42 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_12, 1e-06), kwargs = {})
%rsqrt_12 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_42,), kwargs = {})
%mul_103 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_41, %rsqrt_12), kwargs = {})
%mul_104 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_103, %arg13_1), kwargs = {})
%choose_qparams_per_token_asymmetric_42 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_104, torch.int8), kwargs = {})
%getitem_108 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_42, 0), kwargs = {})
%getitem_109 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_42, 1), kwargs = {})
%quantize_per_token_42 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_104, %getitem_108, %getitem_109, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_42 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_42, %getitem_108, %getitem_109, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_42 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg212_1, %arg213_1, %arg214_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_42 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_42,), kwargs = {})
%view_204 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_42, [1, 4096]), kwargs = {})
%mm_42 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_204, %t_42), kwargs = {})
%view_205 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_42, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_43 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_104, torch.int8), kwargs = {})
%getitem_110 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_43, 0), kwargs = {})
%getitem_111 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_43, 1), kwargs = {})
%quantize_per_token_43 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_104, %getitem_110, %getitem_111, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_43 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_43, %getitem_110, %getitem_111, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_43 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg215_1, %arg216_1, %arg217_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_43 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_43,), kwargs = {})
%view_206 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_43, [1, 4096]), kwargs = {})
%mm_43 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_206, %t_43), kwargs = {})
%view_207 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_43, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_44 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_104, torch.int8), kwargs = {})
%getitem_112 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_44, 0), kwargs = {})
%getitem_113 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_44, 1), kwargs = {})
%quantize_per_token_44 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_104, %getitem_112, %getitem_113, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_44 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_44, %getitem_112, %getitem_113, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_44 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg218_1, %arg219_1, %arg220_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_44 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_44,), kwargs = {})
%view_208 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_44, [1, 4096]), kwargs = {})
%mm_44 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_208, %t_44), kwargs = {})
%view_209 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_44, [1, 1, 4096]), kwargs = {})
%view_210 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_205, [1, 1, 32, 128]), kwargs = {})
%view_211 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_207, [1, 1, 32, 128]), kwargs = {})
%view_212 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_209, [1, 1, 32, 128]), kwargs = {})
%view_213 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_210, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_12 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_213, -1), kwargs = {})
%getitem_114 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_12, 0), kwargs = {})
%getitem_115 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_12, 1), kwargs = {})
%view_214 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_211, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_13 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_214, -1), kwargs = {})
%getitem_116 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_13, 0), kwargs = {})
%getitem_117 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_13, 1), kwargs = {})
%view_215 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_216 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_105 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_114, %view_215), kwargs = {})
%mul_106 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_115, %view_216), kwargs = {})
%sub_12 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_105, %mul_106), kwargs = {})
%mul_107 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_114, %view_216), kwargs = {})
%mul_108 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_115, %view_215), kwargs = {})
%add_43 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_107, %mul_108), kwargs = {})
%mul_109 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_116, %view_215), kwargs = {})
%mul_110 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_117, %view_216), kwargs = {})
%sub_13 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_109, %mul_110), kwargs = {})
%mul_111 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_116, %view_216), kwargs = {})
%mul_112 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_117, %view_215), kwargs = {})
%add_44 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_111, %mul_112), kwargs = {})
%stack_12 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_12, %add_43], -1), kwargs = {})
%view_217 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_12, [1, 1, 32, 128]), kwargs = {})
%stack_13 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_13, %add_44], -1), kwargs = {})
%view_218 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_13, [1, 1, 32, 128]), kwargs = {})
%transpose_30 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_217, 1, 2), kwargs = {})
%transpose_31 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_218, 1, 2), kwargs = {})
%transpose_32 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_212, 1, 2), kwargs = {})
%slice_61 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg221_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_62 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_61, 1, 0, 9223372036854775807), kwargs = {})
%view_219 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_31, [32, 1, 128]), kwargs = {})
%index_put_12 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_62, [None, None, %arg840_1], %view_219), kwargs = {})
%slice_63 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg221_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_24 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_63, %index_put_12, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_25 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg221_1, %slice_scatter_24, 0, 0, 9223372036854775807), kwargs = {})
%slice_66 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg222_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_67 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_66, 1, 0, 9223372036854775807), kwargs = {})
%view_220 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_32, [32, 1, 128]), kwargs = {})
%index_put_13 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_67, [None, None, %arg840_1], %view_220), kwargs = {})
%slice_68 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg222_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_26 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_68, %index_put_13, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_27 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg222_1, %slice_scatter_26, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_36 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg223_1, 0), kwargs = {})
%unsqueeze_37 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_36, 1), kwargs = {})
%index_8 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_37, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_39 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_25, 2), kwargs = {})
%expand_49 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_39, [1, 32, 1, 128, 128]), kwargs = {})
%clone_12 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_49,), kwargs = {memory_format: torch.contiguous_format})
%view_221 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_12, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_41 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_27, 2), kwargs = {})
%expand_51 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_41, [1, 32, 1, 128, 128]), kwargs = {})
%clone_13 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_51,), kwargs = {memory_format: torch.contiguous_format})
%view_222 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_13, [1, 32, 128, 128]), kwargs = {})
%zeros_like_6 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_8,), kwargs = {dtype: torch.float32})
%logical_not_6 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_8,), kwargs = {})
%masked_fill_6 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_6, %logical_not_6, -inf), kwargs = {})
%mul_113 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_30, 0.29730177875068026), kwargs = {})
%transpose_33 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_221, -2, -1), kwargs = {})
%mul_114 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_33, 0.29730177875068026), kwargs = {})
%expand_52 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_113, [1, 32, 1, 128]), kwargs = {})
%view_223 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_52, [32, 1, 128]), kwargs = {})
%expand_53 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_114, [1, 32, 128, 128]), kwargs = {})
%view_224 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_53, [32, 128, 128]), kwargs = {})
%bmm_12 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_223, %view_224), kwargs = {})
%view_225 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_12, [1, 32, 1, 128]), kwargs = {})
%add_45 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_225, %masked_fill_6), kwargs = {})
%_softmax_6 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_45, -1, False), kwargs = {})
%expand_54 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_6, [1, 32, 1, 128]), kwargs = {})
%view_226 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_54, [32, 1, 128]), kwargs = {})
%expand_55 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_222, [1, 32, 128, 128]), kwargs = {})
%view_227 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_55, [32, 128, 128]), kwargs = {})
%bmm_13 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_226, %view_227), kwargs = {})
%view_228 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_13, [1, 32, 1, 128]), kwargs = {})
%transpose_34 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_228, 1, 2), kwargs = {})
%view_229 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_34, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_45 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_229, torch.int8), kwargs = {})
%getitem_118 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_45, 0), kwargs = {})
%getitem_119 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_45, 1), kwargs = {})
%quantize_per_token_45 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_229, %getitem_118, %getitem_119, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_45 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_45, %getitem_118, %getitem_119, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_45 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg224_1, %arg225_1, %arg226_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_45 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_45,), kwargs = {})
%view_230 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_45, [1, 4096]), kwargs = {})
%mm_45 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_230, %t_45), kwargs = {})
%view_231 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_45, [1, 1, 4096]), kwargs = {})
%add_46 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_41, %view_231), kwargs = {})
%mul_115 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_46, %add_46), kwargs = {})
%mean_13 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_115, [-1], True), kwargs = {})
%add_47 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_13, 1e-06), kwargs = {})
%rsqrt_13 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_47,), kwargs = {})
%mul_116 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_46, %rsqrt_13), kwargs = {})
%mul_117 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_116, %arg14_1), kwargs = {})
%choose_qparams_per_token_asymmetric_46 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_117, torch.int8), kwargs = {})
%getitem_120 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_46, 0), kwargs = {})
%getitem_121 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_46, 1), kwargs = {})
%quantize_per_token_46 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_117, %getitem_120, %getitem_121, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_46 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_46, %getitem_120, %getitem_121, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_46 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg227_1, %arg228_1, %arg229_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_46 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_46,), kwargs = {})
%view_232 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_46, [1, 4096]), kwargs = {})
%mm_46 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_232, %t_46), kwargs = {})
%view_233 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_46, [1, 1, 11008]), kwargs = {})
%silu_6 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_233,), kwargs = {})
%choose_qparams_per_token_asymmetric_47 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_117, torch.int8), kwargs = {})
%getitem_122 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_47, 0), kwargs = {})
%getitem_123 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_47, 1), kwargs = {})
%quantize_per_token_47 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_117, %getitem_122, %getitem_123, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_47 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_47, %getitem_122, %getitem_123, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_47 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg230_1, %arg231_1, %arg232_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_47 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_47,), kwargs = {})
%view_234 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_47, [1, 4096]), kwargs = {})
%mm_47 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_234, %t_47), kwargs = {})
%view_235 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_47, [1, 1, 11008]), kwargs = {})
%mul_118 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_6, %view_235), kwargs = {})
%choose_qparams_per_token_asymmetric_48 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_118, torch.int8), kwargs = {})
%getitem_124 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_48, 0), kwargs = {})
%getitem_125 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_48, 1), kwargs = {})
%quantize_per_token_48 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_118, %getitem_124, %getitem_125, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_48 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_48, %getitem_124, %getitem_125, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_48 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg233_1, %arg234_1, %arg235_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_48 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_48,), kwargs = {})
%view_236 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_48, [1, 11008]), kwargs = {})
%mm_48 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_236, %t_48), kwargs = {})
%view_237 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_48, [1, 1, 4096]), kwargs = {})
%add_48 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_46, %view_237), kwargs = {})
%mul_119 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_48, %add_48), kwargs = {})
%mean_14 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_119, [-1], True), kwargs = {})
%add_49 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_14, 1e-06), kwargs = {})
%rsqrt_14 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_49,), kwargs = {})
%mul_120 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_48, %rsqrt_14), kwargs = {})
%mul_121 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_120, %arg15_1), kwargs = {})
%choose_qparams_per_token_asymmetric_49 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_121, torch.int8), kwargs = {})
%getitem_126 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_49, 0), kwargs = {})
%getitem_127 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_49, 1), kwargs = {})
%quantize_per_token_49 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_121, %getitem_126, %getitem_127, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_49 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_49, %getitem_126, %getitem_127, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_49 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg236_1, %arg237_1, %arg238_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_49 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_49,), kwargs = {})
%view_238 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_49, [1, 4096]), kwargs = {})
%mm_49 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_238, %t_49), kwargs = {})
%view_239 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_49, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_50 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_121, torch.int8), kwargs = {})
%getitem_128 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_50, 0), kwargs = {})
%getitem_129 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_50, 1), kwargs = {})
%quantize_per_token_50 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_121, %getitem_128, %getitem_129, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_50 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_50, %getitem_128, %getitem_129, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_50 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg239_1, %arg240_1, %arg241_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_50 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_50,), kwargs = {})
%view_240 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_50, [1, 4096]), kwargs = {})
%mm_50 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_240, %t_50), kwargs = {})
%view_241 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_50, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_51 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_121, torch.int8), kwargs = {})
%getitem_130 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_51, 0), kwargs = {})
%getitem_131 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_51, 1), kwargs = {})
%quantize_per_token_51 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_121, %getitem_130, %getitem_131, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_51 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_51, %getitem_130, %getitem_131, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_51 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg242_1, %arg243_1, %arg244_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_51 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_51,), kwargs = {})
%view_242 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_51, [1, 4096]), kwargs = {})
%mm_51 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_242, %t_51), kwargs = {})
%view_243 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_51, [1, 1, 4096]), kwargs = {})
%view_244 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_239, [1, 1, 32, 128]), kwargs = {})
%view_245 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_241, [1, 1, 32, 128]), kwargs = {})
%view_246 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_243, [1, 1, 32, 128]), kwargs = {})
%view_247 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_244, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_14 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_247, -1), kwargs = {})
%getitem_132 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_14, 0), kwargs = {})
%getitem_133 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_14, 1), kwargs = {})
%view_248 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_245, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_15 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_248, -1), kwargs = {})
%getitem_134 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_15, 0), kwargs = {})
%getitem_135 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_15, 1), kwargs = {})
%view_249 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_250 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_122 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_132, %view_249), kwargs = {})
%mul_123 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_133, %view_250), kwargs = {})
%sub_14 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_122, %mul_123), kwargs = {})
%mul_124 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_132, %view_250), kwargs = {})
%mul_125 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_133, %view_249), kwargs = {})
%add_50 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_124, %mul_125), kwargs = {})
%mul_126 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_134, %view_249), kwargs = {})
%mul_127 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_135, %view_250), kwargs = {})
%sub_15 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_126, %mul_127), kwargs = {})
%mul_128 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_134, %view_250), kwargs = {})
%mul_129 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_135, %view_249), kwargs = {})
%add_51 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_128, %mul_129), kwargs = {})
%stack_14 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_14, %add_50], -1), kwargs = {})
%view_251 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_14, [1, 1, 32, 128]), kwargs = {})
%stack_15 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_15, %add_51], -1), kwargs = {})
%view_252 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_15, [1, 1, 32, 128]), kwargs = {})
%transpose_35 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_251, 1, 2), kwargs = {})
%transpose_36 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_252, 1, 2), kwargs = {})
%transpose_37 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_246, 1, 2), kwargs = {})
%slice_71 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg245_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_72 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_71, 1, 0, 9223372036854775807), kwargs = {})
%view_253 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_36, [32, 1, 128]), kwargs = {})
%index_put_14 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_72, [None, None, %arg840_1], %view_253), kwargs = {})
%slice_73 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg245_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_28 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_73, %index_put_14, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_29 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg245_1, %slice_scatter_28, 0, 0, 9223372036854775807), kwargs = {})
%slice_76 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg246_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_77 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_76, 1, 0, 9223372036854775807), kwargs = {})
%view_254 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_37, [32, 1, 128]), kwargs = {})
%index_put_15 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_77, [None, None, %arg840_1], %view_254), kwargs = {})
%slice_78 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg246_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_30 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_78, %index_put_15, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_31 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg246_1, %slice_scatter_30, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_42 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg247_1, 0), kwargs = {})
%unsqueeze_43 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_42, 1), kwargs = {})
%index_9 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_43, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_45 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_29, 2), kwargs = {})
%expand_57 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_45, [1, 32, 1, 128, 128]), kwargs = {})
%clone_14 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_57,), kwargs = {memory_format: torch.contiguous_format})
%view_255 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_14, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_47 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_31, 2), kwargs = {})
%expand_59 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_47, [1, 32, 1, 128, 128]), kwargs = {})
%clone_15 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_59,), kwargs = {memory_format: torch.contiguous_format})
%view_256 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_15, [1, 32, 128, 128]), kwargs = {})
%zeros_like_7 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_9,), kwargs = {dtype: torch.float32})
%logical_not_7 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_9,), kwargs = {})
%masked_fill_7 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_7, %logical_not_7, -inf), kwargs = {})
%mul_130 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_35, 0.29730177875068026), kwargs = {})
%transpose_38 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_255, -2, -1), kwargs = {})
%mul_131 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_38, 0.29730177875068026), kwargs = {})
%expand_60 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_130, [1, 32, 1, 128]), kwargs = {})
%view_257 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_60, [32, 1, 128]), kwargs = {})
%expand_61 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_131, [1, 32, 128, 128]), kwargs = {})
%view_258 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_61, [32, 128, 128]), kwargs = {})
%bmm_14 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_257, %view_258), kwargs = {})
%view_259 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_14, [1, 32, 1, 128]), kwargs = {})
%add_52 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_259, %masked_fill_7), kwargs = {})
%_softmax_7 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_52, -1, False), kwargs = {})
%expand_62 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_7, [1, 32, 1, 128]), kwargs = {})
%view_260 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_62, [32, 1, 128]), kwargs = {})
%expand_63 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_256, [1, 32, 128, 128]), kwargs = {})
%view_261 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_63, [32, 128, 128]), kwargs = {})
%bmm_15 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_260, %view_261), kwargs = {})
%view_262 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_15, [1, 32, 1, 128]), kwargs = {})
%transpose_39 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_262, 1, 2), kwargs = {})
%view_263 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_39, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_52 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_263, torch.int8), kwargs = {})
%getitem_136 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_52, 0), kwargs = {})
%getitem_137 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_52, 1), kwargs = {})
%quantize_per_token_52 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_263, %getitem_136, %getitem_137, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_52 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_52, %getitem_136, %getitem_137, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_52 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg248_1, %arg249_1, %arg250_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_52 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_52,), kwargs = {})
%view_264 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_52, [1, 4096]), kwargs = {})
%mm_52 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_264, %t_52), kwargs = {})
%view_265 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_52, [1, 1, 4096]), kwargs = {})
%add_53 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_48, %view_265), kwargs = {})
%mul_132 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_53, %add_53), kwargs = {})
%mean_15 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_132, [-1], True), kwargs = {})
%add_54 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_15, 1e-06), kwargs = {})
%rsqrt_15 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_54,), kwargs = {})
%mul_133 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_53, %rsqrt_15), kwargs = {})
%mul_134 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_133, %arg16_1), kwargs = {})
%choose_qparams_per_token_asymmetric_53 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_134, torch.int8), kwargs = {})
%getitem_138 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_53, 0), kwargs = {})
%getitem_139 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_53, 1), kwargs = {})
%quantize_per_token_53 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_134, %getitem_138, %getitem_139, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_53 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_53, %getitem_138, %getitem_139, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_53 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg251_1, %arg252_1, %arg253_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_53 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_53,), kwargs = {})
%view_266 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_53, [1, 4096]), kwargs = {})
%mm_53 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_266, %t_53), kwargs = {})
%view_267 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_53, [1, 1, 11008]), kwargs = {})
%silu_7 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_267,), kwargs = {})
%choose_qparams_per_token_asymmetric_54 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_134, torch.int8), kwargs = {})
%getitem_140 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_54, 0), kwargs = {})
%getitem_141 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_54, 1), kwargs = {})
%quantize_per_token_54 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_134, %getitem_140, %getitem_141, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_54 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_54, %getitem_140, %getitem_141, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_54 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg254_1, %arg255_1, %arg256_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_54 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_54,), kwargs = {})
%view_268 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_54, [1, 4096]), kwargs = {})
%mm_54 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_268, %t_54), kwargs = {})
%view_269 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_54, [1, 1, 11008]), kwargs = {})
%mul_135 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_7, %view_269), kwargs = {})
%choose_qparams_per_token_asymmetric_55 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_135, torch.int8), kwargs = {})
%getitem_142 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_55, 0), kwargs = {})
%getitem_143 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_55, 1), kwargs = {})
%quantize_per_token_55 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_135, %getitem_142, %getitem_143, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_55 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_55, %getitem_142, %getitem_143, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_55 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg257_1, %arg258_1, %arg259_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_55 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_55,), kwargs = {})
%view_270 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_55, [1, 11008]), kwargs = {})
%mm_55 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_270, %t_55), kwargs = {})
%view_271 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_55, [1, 1, 4096]), kwargs = {})
%add_55 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_53, %view_271), kwargs = {})
%mul_136 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_55, %add_55), kwargs = {})
%mean_16 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_136, [-1], True), kwargs = {})
%add_56 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_16, 1e-06), kwargs = {})
%rsqrt_16 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_56,), kwargs = {})
%mul_137 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_55, %rsqrt_16), kwargs = {})
%mul_138 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_137, %arg17_1), kwargs = {})
%choose_qparams_per_token_asymmetric_56 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_138, torch.int8), kwargs = {})
%getitem_144 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_56, 0), kwargs = {})
%getitem_145 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_56, 1), kwargs = {})
%quantize_per_token_56 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_138, %getitem_144, %getitem_145, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_56 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_56, %getitem_144, %getitem_145, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_56 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg260_1, %arg261_1, %arg262_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_56 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_56,), kwargs = {})
%view_272 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_56, [1, 4096]), kwargs = {})
%mm_56 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_272, %t_56), kwargs = {})
%view_273 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_56, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_57 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_138, torch.int8), kwargs = {})
%getitem_146 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_57, 0), kwargs = {})
%getitem_147 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_57, 1), kwargs = {})
%quantize_per_token_57 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_138, %getitem_146, %getitem_147, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_57 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_57, %getitem_146, %getitem_147, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_57 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg263_1, %arg264_1, %arg265_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_57 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_57,), kwargs = {})
%view_274 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_57, [1, 4096]), kwargs = {})
%mm_57 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_274, %t_57), kwargs = {})
%view_275 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_57, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_58 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_138, torch.int8), kwargs = {})
%getitem_148 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_58, 0), kwargs = {})
%getitem_149 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_58, 1), kwargs = {})
%quantize_per_token_58 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_138, %getitem_148, %getitem_149, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_58 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_58, %getitem_148, %getitem_149, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_58 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg266_1, %arg267_1, %arg268_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_58 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_58,), kwargs = {})
%view_276 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_58, [1, 4096]), kwargs = {})
%mm_58 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_276, %t_58), kwargs = {})
%view_277 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_58, [1, 1, 4096]), kwargs = {})
%view_278 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_273, [1, 1, 32, 128]), kwargs = {})
%view_279 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_275, [1, 1, 32, 128]), kwargs = {})
%view_280 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_277, [1, 1, 32, 128]), kwargs = {})
%view_281 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_278, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_16 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_281, -1), kwargs = {})
%getitem_150 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_16, 0), kwargs = {})
%getitem_151 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_16, 1), kwargs = {})
%view_282 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_279, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_17 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_282, -1), kwargs = {})
%getitem_152 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_17, 0), kwargs = {})
%getitem_153 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_17, 1), kwargs = {})
%view_283 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_284 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_139 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_150, %view_283), kwargs = {})
%mul_140 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_151, %view_284), kwargs = {})
%sub_16 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_139, %mul_140), kwargs = {})
%mul_141 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_150, %view_284), kwargs = {})
%mul_142 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_151, %view_283), kwargs = {})
%add_57 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_141, %mul_142), kwargs = {})
%mul_143 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_152, %view_283), kwargs = {})
%mul_144 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_153, %view_284), kwargs = {})
%sub_17 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_143, %mul_144), kwargs = {})
%mul_145 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_152, %view_284), kwargs = {})
%mul_146 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_153, %view_283), kwargs = {})
%add_58 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_145, %mul_146), kwargs = {})
%stack_16 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_16, %add_57], -1), kwargs = {})
%view_285 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_16, [1, 1, 32, 128]), kwargs = {})
%stack_17 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_17, %add_58], -1), kwargs = {})
%view_286 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_17, [1, 1, 32, 128]), kwargs = {})
%transpose_40 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_285, 1, 2), kwargs = {})
%transpose_41 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_286, 1, 2), kwargs = {})
%transpose_42 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_280, 1, 2), kwargs = {})
%slice_81 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg269_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_82 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_81, 1, 0, 9223372036854775807), kwargs = {})
%view_287 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_41, [32, 1, 128]), kwargs = {})
%index_put_16 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_82, [None, None, %arg840_1], %view_287), kwargs = {})
%slice_83 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg269_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_32 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_83, %index_put_16, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_33 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg269_1, %slice_scatter_32, 0, 0, 9223372036854775807), kwargs = {})
%slice_86 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg270_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_87 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_86, 1, 0, 9223372036854775807), kwargs = {})
%view_288 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_42, [32, 1, 128]), kwargs = {})
%index_put_17 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_87, [None, None, %arg840_1], %view_288), kwargs = {})
%slice_88 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg270_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_34 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_88, %index_put_17, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_35 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg270_1, %slice_scatter_34, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_48 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg271_1, 0), kwargs = {})
%unsqueeze_49 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_48, 1), kwargs = {})
%index_10 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_49, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_51 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_33, 2), kwargs = {})
%expand_65 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_51, [1, 32, 1, 128, 128]), kwargs = {})
%clone_16 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_65,), kwargs = {memory_format: torch.contiguous_format})
%view_289 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_16, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_53 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_35, 2), kwargs = {})
%expand_67 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_53, [1, 32, 1, 128, 128]), kwargs = {})
%clone_17 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_67,), kwargs = {memory_format: torch.contiguous_format})
%view_290 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_17, [1, 32, 128, 128]), kwargs = {})
%zeros_like_8 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_10,), kwargs = {dtype: torch.float32})
%logical_not_8 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_10,), kwargs = {})
%masked_fill_8 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_8, %logical_not_8, -inf), kwargs = {})
%mul_147 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_40, 0.29730177875068026), kwargs = {})
%transpose_43 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_289, -2, -1), kwargs = {})
%mul_148 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_43, 0.29730177875068026), kwargs = {})
%expand_68 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_147, [1, 32, 1, 128]), kwargs = {})
%view_291 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_68, [32, 1, 128]), kwargs = {})
%expand_69 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_148, [1, 32, 128, 128]), kwargs = {})
%view_292 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_69, [32, 128, 128]), kwargs = {})
%bmm_16 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_291, %view_292), kwargs = {})
%view_293 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_16, [1, 32, 1, 128]), kwargs = {})
%add_59 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_293, %masked_fill_8), kwargs = {})
%_softmax_8 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_59, -1, False), kwargs = {})
%expand_70 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_8, [1, 32, 1, 128]), kwargs = {})
%view_294 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_70, [32, 1, 128]), kwargs = {})
%expand_71 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_290, [1, 32, 128, 128]), kwargs = {})
%view_295 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_71, [32, 128, 128]), kwargs = {})
%bmm_17 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_294, %view_295), kwargs = {})
%view_296 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_17, [1, 32, 1, 128]), kwargs = {})
%transpose_44 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_296, 1, 2), kwargs = {})
%view_297 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_44, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_59 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_297, torch.int8), kwargs = {})
%getitem_154 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_59, 0), kwargs = {})
%getitem_155 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_59, 1), kwargs = {})
%quantize_per_token_59 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_297, %getitem_154, %getitem_155, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_59 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_59, %getitem_154, %getitem_155, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_59 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg272_1, %arg273_1, %arg274_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_59 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_59,), kwargs = {})
%view_298 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_59, [1, 4096]), kwargs = {})
%mm_59 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_298, %t_59), kwargs = {})
%view_299 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_59, [1, 1, 4096]), kwargs = {})
%add_60 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_55, %view_299), kwargs = {})
%mul_149 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_60, %add_60), kwargs = {})
%mean_17 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_149, [-1], True), kwargs = {})
%add_61 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_17, 1e-06), kwargs = {})
%rsqrt_17 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_61,), kwargs = {})
%mul_150 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_60, %rsqrt_17), kwargs = {})
%mul_151 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_150, %arg18_1), kwargs = {})
%choose_qparams_per_token_asymmetric_60 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_151, torch.int8), kwargs = {})
%getitem_156 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_60, 0), kwargs = {})
%getitem_157 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_60, 1), kwargs = {})
%quantize_per_token_60 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_151, %getitem_156, %getitem_157, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_60 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_60, %getitem_156, %getitem_157, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_60 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg275_1, %arg276_1, %arg277_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_60 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_60,), kwargs = {})
%view_300 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_60, [1, 4096]), kwargs = {})
%mm_60 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_300, %t_60), kwargs = {})
%view_301 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_60, [1, 1, 11008]), kwargs = {})
%silu_8 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_301,), kwargs = {})
%choose_qparams_per_token_asymmetric_61 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_151, torch.int8), kwargs = {})
%getitem_158 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_61, 0), kwargs = {})
%getitem_159 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_61, 1), kwargs = {})
%quantize_per_token_61 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_151, %getitem_158, %getitem_159, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_61 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_61, %getitem_158, %getitem_159, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_61 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg278_1, %arg279_1, %arg280_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_61 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_61,), kwargs = {})
%view_302 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_61, [1, 4096]), kwargs = {})
%mm_61 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_302, %t_61), kwargs = {})
%view_303 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_61, [1, 1, 11008]), kwargs = {})
%mul_152 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_8, %view_303), kwargs = {})
%choose_qparams_per_token_asymmetric_62 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_152, torch.int8), kwargs = {})
%getitem_160 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_62, 0), kwargs = {})
%getitem_161 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_62, 1), kwargs = {})
%quantize_per_token_62 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_152, %getitem_160, %getitem_161, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_62 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_62, %getitem_160, %getitem_161, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_62 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg281_1, %arg282_1, %arg283_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_62 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_62,), kwargs = {})
%view_304 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_62, [1, 11008]), kwargs = {})
%mm_62 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_304, %t_62), kwargs = {})
%view_305 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_62, [1, 1, 4096]), kwargs = {})
%add_62 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_60, %view_305), kwargs = {})
%mul_153 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_62, %add_62), kwargs = {})
%mean_18 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_153, [-1], True), kwargs = {})
%add_63 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_18, 1e-06), kwargs = {})
%rsqrt_18 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_63,), kwargs = {})
%mul_154 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_62, %rsqrt_18), kwargs = {})
%mul_155 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_154, %arg19_1), kwargs = {})
%choose_qparams_per_token_asymmetric_63 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_155, torch.int8), kwargs = {})
%getitem_162 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_63, 0), kwargs = {})
%getitem_163 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_63, 1), kwargs = {})
%quantize_per_token_63 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_155, %getitem_162, %getitem_163, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_63 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_63, %getitem_162, %getitem_163, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_63 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg284_1, %arg285_1, %arg286_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_63 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_63,), kwargs = {})
%view_306 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_63, [1, 4096]), kwargs = {})
%mm_63 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_306, %t_63), kwargs = {})
%view_307 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_63, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_64 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_155, torch.int8), kwargs = {})
%getitem_164 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_64, 0), kwargs = {})
%getitem_165 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_64, 1), kwargs = {})
%quantize_per_token_64 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_155, %getitem_164, %getitem_165, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_64 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_64, %getitem_164, %getitem_165, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_64 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg287_1, %arg288_1, %arg289_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_64 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_64,), kwargs = {})
%view_308 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_64, [1, 4096]), kwargs = {})
%mm_64 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_308, %t_64), kwargs = {})
%view_309 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_64, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_65 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_155, torch.int8), kwargs = {})
%getitem_166 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_65, 0), kwargs = {})
%getitem_167 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_65, 1), kwargs = {})
%quantize_per_token_65 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_155, %getitem_166, %getitem_167, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_65 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_65, %getitem_166, %getitem_167, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_65 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg290_1, %arg291_1, %arg292_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_65 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_65,), kwargs = {})
%view_310 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_65, [1, 4096]), kwargs = {})
%mm_65 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_310, %t_65), kwargs = {})
%view_311 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_65, [1, 1, 4096]), kwargs = {})
%view_312 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_307, [1, 1, 32, 128]), kwargs = {})
%view_313 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_309, [1, 1, 32, 128]), kwargs = {})
%view_314 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_311, [1, 1, 32, 128]), kwargs = {})
%view_315 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_312, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_18 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_315, -1), kwargs = {})
%getitem_168 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_18, 0), kwargs = {})
%getitem_169 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_18, 1), kwargs = {})
%view_316 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_313, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_19 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_316, -1), kwargs = {})
%getitem_170 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_19, 0), kwargs = {})
%getitem_171 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_19, 1), kwargs = {})
%view_317 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_318 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_156 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_168, %view_317), kwargs = {})
%mul_157 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_169, %view_318), kwargs = {})
%sub_18 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_156, %mul_157), kwargs = {})
%mul_158 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_168, %view_318), kwargs = {})
%mul_159 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_169, %view_317), kwargs = {})
%add_64 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_158, %mul_159), kwargs = {})
%mul_160 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_170, %view_317), kwargs = {})
%mul_161 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_171, %view_318), kwargs = {})
%sub_19 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_160, %mul_161), kwargs = {})
%mul_162 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_170, %view_318), kwargs = {})
%mul_163 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_171, %view_317), kwargs = {})
%add_65 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_162, %mul_163), kwargs = {})
%stack_18 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_18, %add_64], -1), kwargs = {})
%view_319 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_18, [1, 1, 32, 128]), kwargs = {})
%stack_19 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_19, %add_65], -1), kwargs = {})
%view_320 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_19, [1, 1, 32, 128]), kwargs = {})
%transpose_45 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_319, 1, 2), kwargs = {})
%transpose_46 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_320, 1, 2), kwargs = {})
%transpose_47 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_314, 1, 2), kwargs = {})
%slice_91 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg293_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_92 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_91, 1, 0, 9223372036854775807), kwargs = {})
%view_321 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_46, [32, 1, 128]), kwargs = {})
%index_put_18 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_92, [None, None, %arg840_1], %view_321), kwargs = {})
%slice_93 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg293_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_36 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_93, %index_put_18, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_37 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg293_1, %slice_scatter_36, 0, 0, 9223372036854775807), kwargs = {})
%slice_96 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg294_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_97 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_96, 1, 0, 9223372036854775807), kwargs = {})
%view_322 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_47, [32, 1, 128]), kwargs = {})
%index_put_19 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_97, [None, None, %arg840_1], %view_322), kwargs = {})
%slice_98 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg294_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_38 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_98, %index_put_19, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_39 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg294_1, %slice_scatter_38, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_54 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg295_1, 0), kwargs = {})
%unsqueeze_55 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_54, 1), kwargs = {})
%index_11 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_55, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_57 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_37, 2), kwargs = {})
%expand_73 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_57, [1, 32, 1, 128, 128]), kwargs = {})
%clone_18 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_73,), kwargs = {memory_format: torch.contiguous_format})
%view_323 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_18, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_59 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_39, 2), kwargs = {})
%expand_75 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_59, [1, 32, 1, 128, 128]), kwargs = {})
%clone_19 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_75,), kwargs = {memory_format: torch.contiguous_format})
%view_324 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_19, [1, 32, 128, 128]), kwargs = {})
%zeros_like_9 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_11,), kwargs = {dtype: torch.float32})
%logical_not_9 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_11,), kwargs = {})
%masked_fill_9 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_9, %logical_not_9, -inf), kwargs = {})
%mul_164 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_45, 0.29730177875068026), kwargs = {})
%transpose_48 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_323, -2, -1), kwargs = {})
%mul_165 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_48, 0.29730177875068026), kwargs = {})
%expand_76 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_164, [1, 32, 1, 128]), kwargs = {})
%view_325 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_76, [32, 1, 128]), kwargs = {})
%expand_77 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_165, [1, 32, 128, 128]), kwargs = {})
%view_326 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_77, [32, 128, 128]), kwargs = {})
%bmm_18 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_325, %view_326), kwargs = {})
%view_327 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_18, [1, 32, 1, 128]), kwargs = {})
%add_66 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_327, %masked_fill_9), kwargs = {})
%_softmax_9 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_66, -1, False), kwargs = {})
%expand_78 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_9, [1, 32, 1, 128]), kwargs = {})
%view_328 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_78, [32, 1, 128]), kwargs = {})
%expand_79 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_324, [1, 32, 128, 128]), kwargs = {})
%view_329 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_79, [32, 128, 128]), kwargs = {})
%bmm_19 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_328, %view_329), kwargs = {})
%view_330 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_19, [1, 32, 1, 128]), kwargs = {})
%transpose_49 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_330, 1, 2), kwargs = {})
%view_331 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_49, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_66 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_331, torch.int8), kwargs = {})
%getitem_172 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_66, 0), kwargs = {})
%getitem_173 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_66, 1), kwargs = {})
%quantize_per_token_66 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_331, %getitem_172, %getitem_173, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_66 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_66, %getitem_172, %getitem_173, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_66 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg296_1, %arg297_1, %arg298_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_66 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_66,), kwargs = {})
%view_332 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_66, [1, 4096]), kwargs = {})
%mm_66 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_332, %t_66), kwargs = {})
%view_333 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_66, [1, 1, 4096]), kwargs = {})
%add_67 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_62, %view_333), kwargs = {})
%mul_166 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_67, %add_67), kwargs = {})
%mean_19 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_166, [-1], True), kwargs = {})
%add_68 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_19, 1e-06), kwargs = {})
%rsqrt_19 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_68,), kwargs = {})
%mul_167 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_67, %rsqrt_19), kwargs = {})
%mul_168 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_167, %arg20_1), kwargs = {})
%choose_qparams_per_token_asymmetric_67 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_168, torch.int8), kwargs = {})
%getitem_174 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_67, 0), kwargs = {})
%getitem_175 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_67, 1), kwargs = {})
%quantize_per_token_67 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_168, %getitem_174, %getitem_175, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_67 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_67, %getitem_174, %getitem_175, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_67 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg299_1, %arg300_1, %arg301_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_67 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_67,), kwargs = {})
%view_334 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_67, [1, 4096]), kwargs = {})
%mm_67 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_334, %t_67), kwargs = {})
%view_335 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_67, [1, 1, 11008]), kwargs = {})
%silu_9 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_335,), kwargs = {})
%choose_qparams_per_token_asymmetric_68 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_168, torch.int8), kwargs = {})
%getitem_176 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_68, 0), kwargs = {})
%getitem_177 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_68, 1), kwargs = {})
%quantize_per_token_68 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_168, %getitem_176, %getitem_177, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_68 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_68, %getitem_176, %getitem_177, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_68 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg302_1, %arg303_1, %arg304_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_68 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_68,), kwargs = {})
%view_336 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_68, [1, 4096]), kwargs = {})
%mm_68 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_336, %t_68), kwargs = {})
%view_337 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_68, [1, 1, 11008]), kwargs = {})
%mul_169 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_9, %view_337), kwargs = {})
%choose_qparams_per_token_asymmetric_69 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_169, torch.int8), kwargs = {})
%getitem_178 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_69, 0), kwargs = {})
%getitem_179 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_69, 1), kwargs = {})
%quantize_per_token_69 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_169, %getitem_178, %getitem_179, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_69 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_69, %getitem_178, %getitem_179, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_69 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg305_1, %arg306_1, %arg307_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_69 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_69,), kwargs = {})
%view_338 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_69, [1, 11008]), kwargs = {})
%mm_69 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_338, %t_69), kwargs = {})
%view_339 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_69, [1, 1, 4096]), kwargs = {})
%add_69 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_67, %view_339), kwargs = {})
%mul_170 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_69, %add_69), kwargs = {})
%mean_20 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_170, [-1], True), kwargs = {})
%add_70 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_20, 1e-06), kwargs = {})
%rsqrt_20 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_70,), kwargs = {})
%mul_171 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_69, %rsqrt_20), kwargs = {})
%mul_172 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_171, %arg21_1), kwargs = {})
%choose_qparams_per_token_asymmetric_70 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_172, torch.int8), kwargs = {})
%getitem_180 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_70, 0), kwargs = {})
%getitem_181 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_70, 1), kwargs = {})
%quantize_per_token_70 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_172, %getitem_180, %getitem_181, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_70 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_70, %getitem_180, %getitem_181, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_70 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg308_1, %arg309_1, %arg310_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_70 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_70,), kwargs = {})
%view_340 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_70, [1, 4096]), kwargs = {})
%mm_70 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_340, %t_70), kwargs = {})
%view_341 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_70, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_71 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_172, torch.int8), kwargs = {})
%getitem_182 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_71, 0), kwargs = {})
%getitem_183 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_71, 1), kwargs = {})
%quantize_per_token_71 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_172, %getitem_182, %getitem_183, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_71 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_71, %getitem_182, %getitem_183, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_71 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg311_1, %arg312_1, %arg313_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_71 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_71,), kwargs = {})
%view_342 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_71, [1, 4096]), kwargs = {})
%mm_71 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_342, %t_71), kwargs = {})
%view_343 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_71, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_72 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_172, torch.int8), kwargs = {})
%getitem_184 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_72, 0), kwargs = {})
%getitem_185 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_72, 1), kwargs = {})
%quantize_per_token_72 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_172, %getitem_184, %getitem_185, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_72 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_72, %getitem_184, %getitem_185, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_72 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg314_1, %arg315_1, %arg316_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_72 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_72,), kwargs = {})
%view_344 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_72, [1, 4096]), kwargs = {})
%mm_72 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_344, %t_72), kwargs = {})
%view_345 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_72, [1, 1, 4096]), kwargs = {})
%view_346 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_341, [1, 1, 32, 128]), kwargs = {})
%view_347 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_343, [1, 1, 32, 128]), kwargs = {})
%view_348 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_345, [1, 1, 32, 128]), kwargs = {})
%view_349 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_346, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_20 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_349, -1), kwargs = {})
%getitem_186 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_20, 0), kwargs = {})
%getitem_187 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_20, 1), kwargs = {})
%view_350 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_347, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_21 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_350, -1), kwargs = {})
%getitem_188 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_21, 0), kwargs = {})
%getitem_189 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_21, 1), kwargs = {})
%view_351 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_352 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_173 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_186, %view_351), kwargs = {})
%mul_174 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_187, %view_352), kwargs = {})
%sub_20 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_173, %mul_174), kwargs = {})
%mul_175 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_186, %view_352), kwargs = {})
%mul_176 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_187, %view_351), kwargs = {})
%add_71 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_175, %mul_176), kwargs = {})
%mul_177 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_188, %view_351), kwargs = {})
%mul_178 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_189, %view_352), kwargs = {})
%sub_21 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_177, %mul_178), kwargs = {})
%mul_179 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_188, %view_352), kwargs = {})
%mul_180 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_189, %view_351), kwargs = {})
%add_72 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_179, %mul_180), kwargs = {})
%stack_20 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_20, %add_71], -1), kwargs = {})
%view_353 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_20, [1, 1, 32, 128]), kwargs = {})
%stack_21 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_21, %add_72], -1), kwargs = {})
%view_354 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_21, [1, 1, 32, 128]), kwargs = {})
%transpose_50 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_353, 1, 2), kwargs = {})
%transpose_51 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_354, 1, 2), kwargs = {})
%transpose_52 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_348, 1, 2), kwargs = {})
%slice_101 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg317_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_102 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_101, 1, 0, 9223372036854775807), kwargs = {})
%view_355 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_51, [32, 1, 128]), kwargs = {})
%index_put_20 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_102, [None, None, %arg840_1], %view_355), kwargs = {})
%slice_103 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg317_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_40 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_103, %index_put_20, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_41 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg317_1, %slice_scatter_40, 0, 0, 9223372036854775807), kwargs = {})
%slice_106 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg318_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_107 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_106, 1, 0, 9223372036854775807), kwargs = {})
%view_356 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_52, [32, 1, 128]), kwargs = {})
%index_put_21 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_107, [None, None, %arg840_1], %view_356), kwargs = {})
%slice_108 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg318_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_42 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_108, %index_put_21, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_43 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg318_1, %slice_scatter_42, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_60 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg319_1, 0), kwargs = {})
%unsqueeze_61 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_60, 1), kwargs = {})
%index_12 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_61, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_63 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_41, 2), kwargs = {})
%expand_81 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_63, [1, 32, 1, 128, 128]), kwargs = {})
%clone_20 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_81,), kwargs = {memory_format: torch.contiguous_format})
%view_357 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_20, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_65 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_43, 2), kwargs = {})
%expand_83 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_65, [1, 32, 1, 128, 128]), kwargs = {})
%clone_21 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_83,), kwargs = {memory_format: torch.contiguous_format})
%view_358 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_21, [1, 32, 128, 128]), kwargs = {})
%zeros_like_10 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_12,), kwargs = {dtype: torch.float32})
%logical_not_10 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_12,), kwargs = {})
%masked_fill_10 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_10, %logical_not_10, -inf), kwargs = {})
%mul_181 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_50, 0.29730177875068026), kwargs = {})
%transpose_53 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_357, -2, -1), kwargs = {})
%mul_182 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_53, 0.29730177875068026), kwargs = {})
%expand_84 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_181, [1, 32, 1, 128]), kwargs = {})
%view_359 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_84, [32, 1, 128]), kwargs = {})
%expand_85 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_182, [1, 32, 128, 128]), kwargs = {})
%view_360 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_85, [32, 128, 128]), kwargs = {})
%bmm_20 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_359, %view_360), kwargs = {})
%view_361 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_20, [1, 32, 1, 128]), kwargs = {})
%add_73 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_361, %masked_fill_10), kwargs = {})
%_softmax_10 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_73, -1, False), kwargs = {})
%expand_86 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_10, [1, 32, 1, 128]), kwargs = {})
%view_362 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_86, [32, 1, 128]), kwargs = {})
%expand_87 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_358, [1, 32, 128, 128]), kwargs = {})
%view_363 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_87, [32, 128, 128]), kwargs = {})
%bmm_21 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_362, %view_363), kwargs = {})
%view_364 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_21, [1, 32, 1, 128]), kwargs = {})
%transpose_54 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_364, 1, 2), kwargs = {})
%view_365 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_54, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_73 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_365, torch.int8), kwargs = {})
%getitem_190 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_73, 0), kwargs = {})
%getitem_191 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_73, 1), kwargs = {})
%quantize_per_token_73 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_365, %getitem_190, %getitem_191, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_73 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_73, %getitem_190, %getitem_191, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_73 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg320_1, %arg321_1, %arg322_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_73 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_73,), kwargs = {})
%view_366 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_73, [1, 4096]), kwargs = {})
%mm_73 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_366, %t_73), kwargs = {})
%view_367 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_73, [1, 1, 4096]), kwargs = {})
%add_74 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_69, %view_367), kwargs = {})
%mul_183 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_74, %add_74), kwargs = {})
%mean_21 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_183, [-1], True), kwargs = {})
%add_75 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_21, 1e-06), kwargs = {})
%rsqrt_21 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_75,), kwargs = {})
%mul_184 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_74, %rsqrt_21), kwargs = {})
%mul_185 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_184, %arg22_1), kwargs = {})
%choose_qparams_per_token_asymmetric_74 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_185, torch.int8), kwargs = {})
%getitem_192 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_74, 0), kwargs = {})
%getitem_193 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_74, 1), kwargs = {})
%quantize_per_token_74 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_185, %getitem_192, %getitem_193, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_74 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_74, %getitem_192, %getitem_193, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_74 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg323_1, %arg324_1, %arg325_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_74 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_74,), kwargs = {})
%view_368 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_74, [1, 4096]), kwargs = {})
%mm_74 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_368, %t_74), kwargs = {})
%view_369 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_74, [1, 1, 11008]), kwargs = {})
%silu_10 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_369,), kwargs = {})
%choose_qparams_per_token_asymmetric_75 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_185, torch.int8), kwargs = {})
%getitem_194 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_75, 0), kwargs = {})
%getitem_195 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_75, 1), kwargs = {})
%quantize_per_token_75 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_185, %getitem_194, %getitem_195, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_75 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_75, %getitem_194, %getitem_195, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_75 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg326_1, %arg327_1, %arg328_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_75 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_75,), kwargs = {})
%view_370 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_75, [1, 4096]), kwargs = {})
%mm_75 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_370, %t_75), kwargs = {})
%view_371 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_75, [1, 1, 11008]), kwargs = {})
%mul_186 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_10, %view_371), kwargs = {})
%choose_qparams_per_token_asymmetric_76 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_186, torch.int8), kwargs = {})
%getitem_196 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_76, 0), kwargs = {})
%getitem_197 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_76, 1), kwargs = {})
%quantize_per_token_76 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_186, %getitem_196, %getitem_197, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_76 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_76, %getitem_196, %getitem_197, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_76 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg329_1, %arg330_1, %arg331_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_76 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_76,), kwargs = {})
%view_372 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_76, [1, 11008]), kwargs = {})
%mm_76 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_372, %t_76), kwargs = {})
%view_373 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_76, [1, 1, 4096]), kwargs = {})
%add_76 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_74, %view_373), kwargs = {})
%mul_187 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_76, %add_76), kwargs = {})
%mean_22 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_187, [-1], True), kwargs = {})
%add_77 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_22, 1e-06), kwargs = {})
%rsqrt_22 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_77,), kwargs = {})
%mul_188 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_76, %rsqrt_22), kwargs = {})
%mul_189 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_188, %arg23_1), kwargs = {})
%choose_qparams_per_token_asymmetric_77 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_189, torch.int8), kwargs = {})
%getitem_198 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_77, 0), kwargs = {})
%getitem_199 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_77, 1), kwargs = {})
%quantize_per_token_77 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_189, %getitem_198, %getitem_199, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_77 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_77, %getitem_198, %getitem_199, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_77 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg332_1, %arg333_1, %arg334_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_77 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_77,), kwargs = {})
%view_374 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_77, [1, 4096]), kwargs = {})
%mm_77 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_374, %t_77), kwargs = {})
%view_375 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_77, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_78 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_189, torch.int8), kwargs = {})
%getitem_200 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_78, 0), kwargs = {})
%getitem_201 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_78, 1), kwargs = {})
%quantize_per_token_78 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_189, %getitem_200, %getitem_201, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_78 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_78, %getitem_200, %getitem_201, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_78 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg335_1, %arg336_1, %arg337_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_78 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_78,), kwargs = {})
%view_376 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_78, [1, 4096]), kwargs = {})
%mm_78 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_376, %t_78), kwargs = {})
%view_377 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_78, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_79 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_189, torch.int8), kwargs = {})
%getitem_202 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_79, 0), kwargs = {})
%getitem_203 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_79, 1), kwargs = {})
%quantize_per_token_79 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_189, %getitem_202, %getitem_203, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_79 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_79, %getitem_202, %getitem_203, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_79 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg338_1, %arg339_1, %arg340_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_79 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_79,), kwargs = {})
%view_378 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_79, [1, 4096]), kwargs = {})
%mm_79 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_378, %t_79), kwargs = {})
%view_379 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_79, [1, 1, 4096]), kwargs = {})
%view_380 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_375, [1, 1, 32, 128]), kwargs = {})
%view_381 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_377, [1, 1, 32, 128]), kwargs = {})
%view_382 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_379, [1, 1, 32, 128]), kwargs = {})
%view_383 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_380, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_22 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_383, -1), kwargs = {})
%getitem_204 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_22, 0), kwargs = {})
%getitem_205 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_22, 1), kwargs = {})
%view_384 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_381, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_23 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_384, -1), kwargs = {})
%getitem_206 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_23, 0), kwargs = {})
%getitem_207 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_23, 1), kwargs = {})
%view_385 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_386 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_190 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_204, %view_385), kwargs = {})
%mul_191 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_205, %view_386), kwargs = {})
%sub_22 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_190, %mul_191), kwargs = {})
%mul_192 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_204, %view_386), kwargs = {})
%mul_193 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_205, %view_385), kwargs = {})
%add_78 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_192, %mul_193), kwargs = {})
%mul_194 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_206, %view_385), kwargs = {})
%mul_195 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_207, %view_386), kwargs = {})
%sub_23 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_194, %mul_195), kwargs = {})
%mul_196 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_206, %view_386), kwargs = {})
%mul_197 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_207, %view_385), kwargs = {})
%add_79 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_196, %mul_197), kwargs = {})
%stack_22 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_22, %add_78], -1), kwargs = {})
%view_387 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_22, [1, 1, 32, 128]), kwargs = {})
%stack_23 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_23, %add_79], -1), kwargs = {})
%view_388 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_23, [1, 1, 32, 128]), kwargs = {})
%transpose_55 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_387, 1, 2), kwargs = {})
%transpose_56 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_388, 1, 2), kwargs = {})
%transpose_57 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_382, 1, 2), kwargs = {})
%slice_111 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg341_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_112 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_111, 1, 0, 9223372036854775807), kwargs = {})
%view_389 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_56, [32, 1, 128]), kwargs = {})
%index_put_22 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_112, [None, None, %arg840_1], %view_389), kwargs = {})
%slice_113 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg341_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_44 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_113, %index_put_22, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_45 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg341_1, %slice_scatter_44, 0, 0, 9223372036854775807), kwargs = {})
%slice_116 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg342_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_117 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_116, 1, 0, 9223372036854775807), kwargs = {})
%view_390 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_57, [32, 1, 128]), kwargs = {})
%index_put_23 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_117, [None, None, %arg840_1], %view_390), kwargs = {})
%slice_118 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg342_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_46 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_118, %index_put_23, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_47 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg342_1, %slice_scatter_46, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_66 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg343_1, 0), kwargs = {})
%unsqueeze_67 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_66, 1), kwargs = {})
%index_13 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_67, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_69 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_45, 2), kwargs = {})
%expand_89 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_69, [1, 32, 1, 128, 128]), kwargs = {})
%clone_22 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_89,), kwargs = {memory_format: torch.contiguous_format})
%view_391 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_22, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_71 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_47, 2), kwargs = {})
%expand_91 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_71, [1, 32, 1, 128, 128]), kwargs = {})
%clone_23 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_91,), kwargs = {memory_format: torch.contiguous_format})
%view_392 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_23, [1, 32, 128, 128]), kwargs = {})
%zeros_like_11 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_13,), kwargs = {dtype: torch.float32})
%logical_not_11 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_13,), kwargs = {})
%masked_fill_11 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_11, %logical_not_11, -inf), kwargs = {})
%mul_198 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_55, 0.29730177875068026), kwargs = {})
%transpose_58 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_391, -2, -1), kwargs = {})
%mul_199 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_58, 0.29730177875068026), kwargs = {})
%expand_92 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_198, [1, 32, 1, 128]), kwargs = {})
%view_393 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_92, [32, 1, 128]), kwargs = {})
%expand_93 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_199, [1, 32, 128, 128]), kwargs = {})
%view_394 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_93, [32, 128, 128]), kwargs = {})
%bmm_22 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_393, %view_394), kwargs = {})
%view_395 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_22, [1, 32, 1, 128]), kwargs = {})
%add_80 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_395, %masked_fill_11), kwargs = {})
%_softmax_11 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_80, -1, False), kwargs = {})
%expand_94 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_11, [1, 32, 1, 128]), kwargs = {})
%view_396 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_94, [32, 1, 128]), kwargs = {})
%expand_95 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_392, [1, 32, 128, 128]), kwargs = {})
%view_397 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_95, [32, 128, 128]), kwargs = {})
%bmm_23 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_396, %view_397), kwargs = {})
%view_398 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_23, [1, 32, 1, 128]), kwargs = {})
%transpose_59 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_398, 1, 2), kwargs = {})
%view_399 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_59, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_80 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_399, torch.int8), kwargs = {})
%getitem_208 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_80, 0), kwargs = {})
%getitem_209 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_80, 1), kwargs = {})
%quantize_per_token_80 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_399, %getitem_208, %getitem_209, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_80 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_80, %getitem_208, %getitem_209, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_80 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg344_1, %arg345_1, %arg346_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_80 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_80,), kwargs = {})
%view_400 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_80, [1, 4096]), kwargs = {})
%mm_80 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_400, %t_80), kwargs = {})
%view_401 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_80, [1, 1, 4096]), kwargs = {})
%add_81 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_76, %view_401), kwargs = {})
%mul_200 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_81, %add_81), kwargs = {})
%mean_23 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_200, [-1], True), kwargs = {})
%add_82 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_23, 1e-06), kwargs = {})
%rsqrt_23 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_82,), kwargs = {})
%mul_201 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_81, %rsqrt_23), kwargs = {})
%mul_202 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_201, %arg24_1), kwargs = {})
%choose_qparams_per_token_asymmetric_81 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_202, torch.int8), kwargs = {})
%getitem_210 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_81, 0), kwargs = {})
%getitem_211 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_81, 1), kwargs = {})
%quantize_per_token_81 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_202, %getitem_210, %getitem_211, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_81 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_81, %getitem_210, %getitem_211, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_81 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg347_1, %arg348_1, %arg349_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_81 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_81,), kwargs = {})
%view_402 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_81, [1, 4096]), kwargs = {})
%mm_81 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_402, %t_81), kwargs = {})
%view_403 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_81, [1, 1, 11008]), kwargs = {})
%silu_11 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_403,), kwargs = {})
%choose_qparams_per_token_asymmetric_82 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_202, torch.int8), kwargs = {})
%getitem_212 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_82, 0), kwargs = {})
%getitem_213 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_82, 1), kwargs = {})
%quantize_per_token_82 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_202, %getitem_212, %getitem_213, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_82 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_82, %getitem_212, %getitem_213, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_82 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg350_1, %arg351_1, %arg352_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_82 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_82,), kwargs = {})
%view_404 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_82, [1, 4096]), kwargs = {})
%mm_82 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_404, %t_82), kwargs = {})
%view_405 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_82, [1, 1, 11008]), kwargs = {})
%mul_203 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_11, %view_405), kwargs = {})
%choose_qparams_per_token_asymmetric_83 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_203, torch.int8), kwargs = {})
%getitem_214 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_83, 0), kwargs = {})
%getitem_215 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_83, 1), kwargs = {})
%quantize_per_token_83 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_203, %getitem_214, %getitem_215, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_83 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_83, %getitem_214, %getitem_215, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_83 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg353_1, %arg354_1, %arg355_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_83 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_83,), kwargs = {})
%view_406 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_83, [1, 11008]), kwargs = {})
%mm_83 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_406, %t_83), kwargs = {})
%view_407 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_83, [1, 1, 4096]), kwargs = {})
%add_83 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_81, %view_407), kwargs = {})
%mul_204 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_83, %add_83), kwargs = {})
%mean_24 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_204, [-1], True), kwargs = {})
%add_84 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_24, 1e-06), kwargs = {})
%rsqrt_24 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_84,), kwargs = {})
%mul_205 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_83, %rsqrt_24), kwargs = {})
%mul_206 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_205, %arg25_1), kwargs = {})
%choose_qparams_per_token_asymmetric_84 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_206, torch.int8), kwargs = {})
%getitem_216 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_84, 0), kwargs = {})
%getitem_217 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_84, 1), kwargs = {})
%quantize_per_token_84 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_206, %getitem_216, %getitem_217, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_84 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_84, %getitem_216, %getitem_217, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_84 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg356_1, %arg357_1, %arg358_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_84 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_84,), kwargs = {})
%view_408 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_84, [1, 4096]), kwargs = {})
%mm_84 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_408, %t_84), kwargs = {})
%view_409 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_84, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_85 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_206, torch.int8), kwargs = {})
%getitem_218 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_85, 0), kwargs = {})
%getitem_219 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_85, 1), kwargs = {})
%quantize_per_token_85 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_206, %getitem_218, %getitem_219, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_85 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_85, %getitem_218, %getitem_219, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_85 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg359_1, %arg360_1, %arg361_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_85 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_85,), kwargs = {})
%view_410 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_85, [1, 4096]), kwargs = {})
%mm_85 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_410, %t_85), kwargs = {})
%view_411 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_85, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_86 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_206, torch.int8), kwargs = {})
%getitem_220 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_86, 0), kwargs = {})
%getitem_221 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_86, 1), kwargs = {})
%quantize_per_token_86 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_206, %getitem_220, %getitem_221, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_86 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_86, %getitem_220, %getitem_221, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_86 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg362_1, %arg363_1, %arg364_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_86 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_86,), kwargs = {})
%view_412 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_86, [1, 4096]), kwargs = {})
%mm_86 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_412, %t_86), kwargs = {})
%view_413 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_86, [1, 1, 4096]), kwargs = {})
%view_414 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_409, [1, 1, 32, 128]), kwargs = {})
%view_415 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_411, [1, 1, 32, 128]), kwargs = {})
%view_416 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_413, [1, 1, 32, 128]), kwargs = {})
%view_417 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_414, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_24 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_417, -1), kwargs = {})
%getitem_222 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_24, 0), kwargs = {})
%getitem_223 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_24, 1), kwargs = {})
%view_418 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_415, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_25 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_418, -1), kwargs = {})
%getitem_224 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_25, 0), kwargs = {})
%getitem_225 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_25, 1), kwargs = {})
%view_419 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_420 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_207 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_222, %view_419), kwargs = {})
%mul_208 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_223, %view_420), kwargs = {})
%sub_24 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_207, %mul_208), kwargs = {})
%mul_209 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_222, %view_420), kwargs = {})
%mul_210 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_223, %view_419), kwargs = {})
%add_85 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_209, %mul_210), kwargs = {})
%mul_211 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_224, %view_419), kwargs = {})
%mul_212 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_225, %view_420), kwargs = {})
%sub_25 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_211, %mul_212), kwargs = {})
%mul_213 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_224, %view_420), kwargs = {})
%mul_214 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_225, %view_419), kwargs = {})
%add_86 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_213, %mul_214), kwargs = {})
%stack_24 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_24, %add_85], -1), kwargs = {})
%view_421 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_24, [1, 1, 32, 128]), kwargs = {})
%stack_25 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_25, %add_86], -1), kwargs = {})
%view_422 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_25, [1, 1, 32, 128]), kwargs = {})
%transpose_60 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_421, 1, 2), kwargs = {})
%transpose_61 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_422, 1, 2), kwargs = {})
%transpose_62 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_416, 1, 2), kwargs = {})
%slice_121 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg365_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_122 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_121, 1, 0, 9223372036854775807), kwargs = {})
%view_423 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_61, [32, 1, 128]), kwargs = {})
%index_put_24 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_122, [None, None, %arg840_1], %view_423), kwargs = {})
%slice_123 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg365_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_48 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_123, %index_put_24, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_49 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg365_1, %slice_scatter_48, 0, 0, 9223372036854775807), kwargs = {})
%slice_126 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg366_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_127 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_126, 1, 0, 9223372036854775807), kwargs = {})
%view_424 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_62, [32, 1, 128]), kwargs = {})
%index_put_25 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_127, [None, None, %arg840_1], %view_424), kwargs = {})
%slice_128 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg366_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_50 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_128, %index_put_25, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_51 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg366_1, %slice_scatter_50, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_72 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg367_1, 0), kwargs = {})
%unsqueeze_73 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_72, 1), kwargs = {})
%index_14 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_73, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_75 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_49, 2), kwargs = {})
%expand_97 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_75, [1, 32, 1, 128, 128]), kwargs = {})
%clone_24 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_97,), kwargs = {memory_format: torch.contiguous_format})
%view_425 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_24, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_77 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_51, 2), kwargs = {})
%expand_99 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_77, [1, 32, 1, 128, 128]), kwargs = {})
%clone_25 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_99,), kwargs = {memory_format: torch.contiguous_format})
%view_426 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_25, [1, 32, 128, 128]), kwargs = {})
%zeros_like_12 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_14,), kwargs = {dtype: torch.float32})
%logical_not_12 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_14,), kwargs = {})
%masked_fill_12 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_12, %logical_not_12, -inf), kwargs = {})
%mul_215 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_60, 0.29730177875068026), kwargs = {})
%transpose_63 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_425, -2, -1), kwargs = {})
%mul_216 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_63, 0.29730177875068026), kwargs = {})
%expand_100 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_215, [1, 32, 1, 128]), kwargs = {})
%view_427 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_100, [32, 1, 128]), kwargs = {})
%expand_101 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_216, [1, 32, 128, 128]), kwargs = {})
%view_428 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_101, [32, 128, 128]), kwargs = {})
%bmm_24 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_427, %view_428), kwargs = {})
%view_429 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_24, [1, 32, 1, 128]), kwargs = {})
%add_87 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_429, %masked_fill_12), kwargs = {})
%_softmax_12 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_87, -1, False), kwargs = {})
%expand_102 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_12, [1, 32, 1, 128]), kwargs = {})
%view_430 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_102, [32, 1, 128]), kwargs = {})
%expand_103 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_426, [1, 32, 128, 128]), kwargs = {})
%view_431 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_103, [32, 128, 128]), kwargs = {})
%bmm_25 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_430, %view_431), kwargs = {})
%view_432 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_25, [1, 32, 1, 128]), kwargs = {})
%transpose_64 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_432, 1, 2), kwargs = {})
%view_433 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_64, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_87 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_433, torch.int8), kwargs = {})
%getitem_226 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_87, 0), kwargs = {})
%getitem_227 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_87, 1), kwargs = {})
%quantize_per_token_87 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_433, %getitem_226, %getitem_227, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_87 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_87, %getitem_226, %getitem_227, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_87 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg368_1, %arg369_1, %arg370_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_87 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_87,), kwargs = {})
%view_434 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_87, [1, 4096]), kwargs = {})
%mm_87 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_434, %t_87), kwargs = {})
%view_435 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_87, [1, 1, 4096]), kwargs = {})
%add_88 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_83, %view_435), kwargs = {})
%mul_217 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_88, %add_88), kwargs = {})
%mean_25 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_217, [-1], True), kwargs = {})
%add_89 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_25, 1e-06), kwargs = {})
%rsqrt_25 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_89,), kwargs = {})
%mul_218 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_88, %rsqrt_25), kwargs = {})
%mul_219 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_218, %arg26_1), kwargs = {})
%choose_qparams_per_token_asymmetric_88 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_219, torch.int8), kwargs = {})
%getitem_228 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_88, 0), kwargs = {})
%getitem_229 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_88, 1), kwargs = {})
%quantize_per_token_88 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_219, %getitem_228, %getitem_229, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_88 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_88, %getitem_228, %getitem_229, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_88 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg371_1, %arg372_1, %arg373_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_88 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_88,), kwargs = {})
%view_436 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_88, [1, 4096]), kwargs = {})
%mm_88 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_436, %t_88), kwargs = {})
%view_437 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_88, [1, 1, 11008]), kwargs = {})
%silu_12 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_437,), kwargs = {})
%choose_qparams_per_token_asymmetric_89 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_219, torch.int8), kwargs = {})
%getitem_230 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_89, 0), kwargs = {})
%getitem_231 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_89, 1), kwargs = {})
%quantize_per_token_89 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_219, %getitem_230, %getitem_231, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_89 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_89, %getitem_230, %getitem_231, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_89 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg374_1, %arg375_1, %arg376_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_89 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_89,), kwargs = {})
%view_438 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_89, [1, 4096]), kwargs = {})
%mm_89 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_438, %t_89), kwargs = {})
%view_439 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_89, [1, 1, 11008]), kwargs = {})
%mul_220 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_12, %view_439), kwargs = {})
%choose_qparams_per_token_asymmetric_90 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_220, torch.int8), kwargs = {})
%getitem_232 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_90, 0), kwargs = {})
%getitem_233 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_90, 1), kwargs = {})
%quantize_per_token_90 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_220, %getitem_232, %getitem_233, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_90 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_90, %getitem_232, %getitem_233, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_90 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg377_1, %arg378_1, %arg379_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_90 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_90,), kwargs = {})
%view_440 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_90, [1, 11008]), kwargs = {})
%mm_90 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_440, %t_90), kwargs = {})
%view_441 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_90, [1, 1, 4096]), kwargs = {})
%add_90 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_88, %view_441), kwargs = {})
%mul_221 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_90, %add_90), kwargs = {})
%mean_26 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_221, [-1], True), kwargs = {})
%add_91 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_26, 1e-06), kwargs = {})
%rsqrt_26 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_91,), kwargs = {})
%mul_222 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_90, %rsqrt_26), kwargs = {})
%mul_223 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_222, %arg27_1), kwargs = {})
%choose_qparams_per_token_asymmetric_91 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_223, torch.int8), kwargs = {})
%getitem_234 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_91, 0), kwargs = {})
%getitem_235 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_91, 1), kwargs = {})
%quantize_per_token_91 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_223, %getitem_234, %getitem_235, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_91 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_91, %getitem_234, %getitem_235, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_91 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg380_1, %arg381_1, %arg382_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_91 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_91,), kwargs = {})
%view_442 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_91, [1, 4096]), kwargs = {})
%mm_91 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_442, %t_91), kwargs = {})
%view_443 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_91, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_92 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_223, torch.int8), kwargs = {})
%getitem_236 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_92, 0), kwargs = {})
%getitem_237 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_92, 1), kwargs = {})
%quantize_per_token_92 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_223, %getitem_236, %getitem_237, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_92 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_92, %getitem_236, %getitem_237, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_92 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg383_1, %arg384_1, %arg385_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_92 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_92,), kwargs = {})
%view_444 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_92, [1, 4096]), kwargs = {})
%mm_92 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_444, %t_92), kwargs = {})
%view_445 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_92, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_93 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_223, torch.int8), kwargs = {})
%getitem_238 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_93, 0), kwargs = {})
%getitem_239 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_93, 1), kwargs = {})
%quantize_per_token_93 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_223, %getitem_238, %getitem_239, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_93 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_93, %getitem_238, %getitem_239, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_93 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg386_1, %arg387_1, %arg388_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_93 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_93,), kwargs = {})
%view_446 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_93, [1, 4096]), kwargs = {})
%mm_93 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_446, %t_93), kwargs = {})
%view_447 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_93, [1, 1, 4096]), kwargs = {})
%view_448 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_443, [1, 1, 32, 128]), kwargs = {})
%view_449 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_445, [1, 1, 32, 128]), kwargs = {})
%view_450 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_447, [1, 1, 32, 128]), kwargs = {})
%view_451 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_448, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_26 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_451, -1), kwargs = {})
%getitem_240 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_26, 0), kwargs = {})
%getitem_241 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_26, 1), kwargs = {})
%view_452 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_449, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_27 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_452, -1), kwargs = {})
%getitem_242 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_27, 0), kwargs = {})
%getitem_243 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_27, 1), kwargs = {})
%view_453 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_454 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_224 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_240, %view_453), kwargs = {})
%mul_225 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_241, %view_454), kwargs = {})
%sub_26 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_224, %mul_225), kwargs = {})
%mul_226 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_240, %view_454), kwargs = {})
%mul_227 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_241, %view_453), kwargs = {})
%add_92 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_226, %mul_227), kwargs = {})
%mul_228 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_242, %view_453), kwargs = {})
%mul_229 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_243, %view_454), kwargs = {})
%sub_27 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_228, %mul_229), kwargs = {})
%mul_230 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_242, %view_454), kwargs = {})
%mul_231 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_243, %view_453), kwargs = {})
%add_93 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_230, %mul_231), kwargs = {})
%stack_26 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_26, %add_92], -1), kwargs = {})
%view_455 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_26, [1, 1, 32, 128]), kwargs = {})
%stack_27 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_27, %add_93], -1), kwargs = {})
%view_456 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_27, [1, 1, 32, 128]), kwargs = {})
%transpose_65 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_455, 1, 2), kwargs = {})
%transpose_66 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_456, 1, 2), kwargs = {})
%transpose_67 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_450, 1, 2), kwargs = {})
%slice_131 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg389_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_132 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_131, 1, 0, 9223372036854775807), kwargs = {})
%view_457 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_66, [32, 1, 128]), kwargs = {})
%index_put_26 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_132, [None, None, %arg840_1], %view_457), kwargs = {})
%slice_133 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg389_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_52 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_133, %index_put_26, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_53 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg389_1, %slice_scatter_52, 0, 0, 9223372036854775807), kwargs = {})
%slice_136 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg390_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_137 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_136, 1, 0, 9223372036854775807), kwargs = {})
%view_458 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_67, [32, 1, 128]), kwargs = {})
%index_put_27 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_137, [None, None, %arg840_1], %view_458), kwargs = {})
%slice_138 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg390_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_54 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_138, %index_put_27, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_55 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg390_1, %slice_scatter_54, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_78 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg391_1, 0), kwargs = {})
%unsqueeze_79 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_78, 1), kwargs = {})
%index_15 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_79, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_81 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_53, 2), kwargs = {})
%expand_105 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_81, [1, 32, 1, 128, 128]), kwargs = {})
%clone_26 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_105,), kwargs = {memory_format: torch.contiguous_format})
%view_459 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_26, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_83 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_55, 2), kwargs = {})
%expand_107 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_83, [1, 32, 1, 128, 128]), kwargs = {})
%clone_27 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_107,), kwargs = {memory_format: torch.contiguous_format})
%view_460 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_27, [1, 32, 128, 128]), kwargs = {})
%zeros_like_13 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_15,), kwargs = {dtype: torch.float32})
%logical_not_13 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_15,), kwargs = {})
%masked_fill_13 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_13, %logical_not_13, -inf), kwargs = {})
%mul_232 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_65, 0.29730177875068026), kwargs = {})
%transpose_68 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_459, -2, -1), kwargs = {})
%mul_233 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_68, 0.29730177875068026), kwargs = {})
%expand_108 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_232, [1, 32, 1, 128]), kwargs = {})
%view_461 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_108, [32, 1, 128]), kwargs = {})
%expand_109 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_233, [1, 32, 128, 128]), kwargs = {})
%view_462 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_109, [32, 128, 128]), kwargs = {})
%bmm_26 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_461, %view_462), kwargs = {})
%view_463 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_26, [1, 32, 1, 128]), kwargs = {})
%add_94 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_463, %masked_fill_13), kwargs = {})
%_softmax_13 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_94, -1, False), kwargs = {})
%expand_110 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_13, [1, 32, 1, 128]), kwargs = {})
%view_464 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_110, [32, 1, 128]), kwargs = {})
%expand_111 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_460, [1, 32, 128, 128]), kwargs = {})
%view_465 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_111, [32, 128, 128]), kwargs = {})
%bmm_27 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_464, %view_465), kwargs = {})
%view_466 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_27, [1, 32, 1, 128]), kwargs = {})
%transpose_69 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_466, 1, 2), kwargs = {})
%view_467 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_69, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_94 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_467, torch.int8), kwargs = {})
%getitem_244 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_94, 0), kwargs = {})
%getitem_245 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_94, 1), kwargs = {})
%quantize_per_token_94 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_467, %getitem_244, %getitem_245, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_94 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_94, %getitem_244, %getitem_245, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_94 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg392_1, %arg393_1, %arg394_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_94 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_94,), kwargs = {})
%view_468 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_94, [1, 4096]), kwargs = {})
%mm_94 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_468, %t_94), kwargs = {})
%view_469 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_94, [1, 1, 4096]), kwargs = {})
%add_95 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_90, %view_469), kwargs = {})
%mul_234 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_95, %add_95), kwargs = {})
%mean_27 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_234, [-1], True), kwargs = {})
%add_96 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_27, 1e-06), kwargs = {})
%rsqrt_27 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_96,), kwargs = {})
%mul_235 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_95, %rsqrt_27), kwargs = {})
%mul_236 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_235, %arg28_1), kwargs = {})
%choose_qparams_per_token_asymmetric_95 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_236, torch.int8), kwargs = {})
%getitem_246 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_95, 0), kwargs = {})
%getitem_247 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_95, 1), kwargs = {})
%quantize_per_token_95 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_236, %getitem_246, %getitem_247, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_95 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_95, %getitem_246, %getitem_247, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_95 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg395_1, %arg396_1, %arg397_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_95 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_95,), kwargs = {})
%view_470 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_95, [1, 4096]), kwargs = {})
%mm_95 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_470, %t_95), kwargs = {})
%view_471 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_95, [1, 1, 11008]), kwargs = {})
%silu_13 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_471,), kwargs = {})
%choose_qparams_per_token_asymmetric_96 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_236, torch.int8), kwargs = {})
%getitem_248 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_96, 0), kwargs = {})
%getitem_249 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_96, 1), kwargs = {})
%quantize_per_token_96 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_236, %getitem_248, %getitem_249, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_96 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_96, %getitem_248, %getitem_249, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_96 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg398_1, %arg399_1, %arg400_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_96 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_96,), kwargs = {})
%view_472 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_96, [1, 4096]), kwargs = {})
%mm_96 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_472, %t_96), kwargs = {})
%view_473 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_96, [1, 1, 11008]), kwargs = {})
%mul_237 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_13, %view_473), kwargs = {})
%choose_qparams_per_token_asymmetric_97 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_237, torch.int8), kwargs = {})
%getitem_250 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_97, 0), kwargs = {})
%getitem_251 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_97, 1), kwargs = {})
%quantize_per_token_97 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_237, %getitem_250, %getitem_251, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_97 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_97, %getitem_250, %getitem_251, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_97 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg401_1, %arg402_1, %arg403_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_97 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_97,), kwargs = {})
%view_474 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_97, [1, 11008]), kwargs = {})
%mm_97 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_474, %t_97), kwargs = {})
%view_475 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_97, [1, 1, 4096]), kwargs = {})
%add_97 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_95, %view_475), kwargs = {})
%mul_238 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_97, %add_97), kwargs = {})
%mean_28 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_238, [-1], True), kwargs = {})
%add_98 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_28, 1e-06), kwargs = {})
%rsqrt_28 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_98,), kwargs = {})
%mul_239 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_97, %rsqrt_28), kwargs = {})
%mul_240 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_239, %arg29_1), kwargs = {})
%choose_qparams_per_token_asymmetric_98 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_240, torch.int8), kwargs = {})
%getitem_252 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_98, 0), kwargs = {})
%getitem_253 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_98, 1), kwargs = {})
%quantize_per_token_98 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_240, %getitem_252, %getitem_253, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_98 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_98, %getitem_252, %getitem_253, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_98 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg404_1, %arg405_1, %arg406_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_98 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_98,), kwargs = {})
%view_476 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_98, [1, 4096]), kwargs = {})
%mm_98 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_476, %t_98), kwargs = {})
%view_477 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_98, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_99 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_240, torch.int8), kwargs = {})
%getitem_254 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_99, 0), kwargs = {})
%getitem_255 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_99, 1), kwargs = {})
%quantize_per_token_99 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_240, %getitem_254, %getitem_255, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_99 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_99, %getitem_254, %getitem_255, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_99 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg407_1, %arg408_1, %arg409_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_99 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_99,), kwargs = {})
%view_478 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_99, [1, 4096]), kwargs = {})
%mm_99 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_478, %t_99), kwargs = {})
%view_479 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_99, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_100 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_240, torch.int8), kwargs = {})
%getitem_256 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_100, 0), kwargs = {})
%getitem_257 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_100, 1), kwargs = {})
%quantize_per_token_100 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_240, %getitem_256, %getitem_257, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_100 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_100, %getitem_256, %getitem_257, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_100 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg410_1, %arg411_1, %arg412_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_100 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_100,), kwargs = {})
%view_480 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_100, [1, 4096]), kwargs = {})
%mm_100 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_480, %t_100), kwargs = {})
%view_481 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_100, [1, 1, 4096]), kwargs = {})
%view_482 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_477, [1, 1, 32, 128]), kwargs = {})
%view_483 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_479, [1, 1, 32, 128]), kwargs = {})
%view_484 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_481, [1, 1, 32, 128]), kwargs = {})
%view_485 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_482, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_28 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_485, -1), kwargs = {})
%getitem_258 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_28, 0), kwargs = {})
%getitem_259 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_28, 1), kwargs = {})
%view_486 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_483, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_29 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_486, -1), kwargs = {})
%getitem_260 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_29, 0), kwargs = {})
%getitem_261 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_29, 1), kwargs = {})
%view_487 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_488 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_241 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_258, %view_487), kwargs = {})
%mul_242 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_259, %view_488), kwargs = {})
%sub_28 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_241, %mul_242), kwargs = {})
%mul_243 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_258, %view_488), kwargs = {})
%mul_244 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_259, %view_487), kwargs = {})
%add_99 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_243, %mul_244), kwargs = {})
%mul_245 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_260, %view_487), kwargs = {})
%mul_246 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_261, %view_488), kwargs = {})
%sub_29 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_245, %mul_246), kwargs = {})
%mul_247 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_260, %view_488), kwargs = {})
%mul_248 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_261, %view_487), kwargs = {})
%add_100 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_247, %mul_248), kwargs = {})
%stack_28 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_28, %add_99], -1), kwargs = {})
%view_489 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_28, [1, 1, 32, 128]), kwargs = {})
%stack_29 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_29, %add_100], -1), kwargs = {})
%view_490 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_29, [1, 1, 32, 128]), kwargs = {})
%transpose_70 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_489, 1, 2), kwargs = {})
%transpose_71 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_490, 1, 2), kwargs = {})
%transpose_72 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_484, 1, 2), kwargs = {})
%slice_141 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg413_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_142 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_141, 1, 0, 9223372036854775807), kwargs = {})
%view_491 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_71, [32, 1, 128]), kwargs = {})
%index_put_28 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_142, [None, None, %arg840_1], %view_491), kwargs = {})
%slice_143 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg413_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_56 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_143, %index_put_28, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_57 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg413_1, %slice_scatter_56, 0, 0, 9223372036854775807), kwargs = {})
%slice_146 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg414_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_147 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_146, 1, 0, 9223372036854775807), kwargs = {})
%view_492 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_72, [32, 1, 128]), kwargs = {})
%index_put_29 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_147, [None, None, %arg840_1], %view_492), kwargs = {})
%slice_148 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg414_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_58 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_148, %index_put_29, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_59 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg414_1, %slice_scatter_58, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_84 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg415_1, 0), kwargs = {})
%unsqueeze_85 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_84, 1), kwargs = {})
%index_16 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_85, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_87 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_57, 2), kwargs = {})
%expand_113 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_87, [1, 32, 1, 128, 128]), kwargs = {})
%clone_28 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_113,), kwargs = {memory_format: torch.contiguous_format})
%view_493 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_28, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_89 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_59, 2), kwargs = {})
%expand_115 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_89, [1, 32, 1, 128, 128]), kwargs = {})
%clone_29 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_115,), kwargs = {memory_format: torch.contiguous_format})
%view_494 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_29, [1, 32, 128, 128]), kwargs = {})
%zeros_like_14 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_16,), kwargs = {dtype: torch.float32})
%logical_not_14 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_16,), kwargs = {})
%masked_fill_14 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_14, %logical_not_14, -inf), kwargs = {})
%mul_249 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_70, 0.29730177875068026), kwargs = {})
%transpose_73 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_493, -2, -1), kwargs = {})
%mul_250 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_73, 0.29730177875068026), kwargs = {})
%expand_116 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_249, [1, 32, 1, 128]), kwargs = {})
%view_495 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_116, [32, 1, 128]), kwargs = {})
%expand_117 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_250, [1, 32, 128, 128]), kwargs = {})
%view_496 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_117, [32, 128, 128]), kwargs = {})
%bmm_28 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_495, %view_496), kwargs = {})
%view_497 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_28, [1, 32, 1, 128]), kwargs = {})
%add_101 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_497, %masked_fill_14), kwargs = {})
%_softmax_14 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_101, -1, False), kwargs = {})
%expand_118 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_14, [1, 32, 1, 128]), kwargs = {})
%view_498 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_118, [32, 1, 128]), kwargs = {})
%expand_119 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_494, [1, 32, 128, 128]), kwargs = {})
%view_499 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_119, [32, 128, 128]), kwargs = {})
%bmm_29 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_498, %view_499), kwargs = {})
%view_500 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_29, [1, 32, 1, 128]), kwargs = {})
%transpose_74 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_500, 1, 2), kwargs = {})
%view_501 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_74, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_101 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_501, torch.int8), kwargs = {})
%getitem_262 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_101, 0), kwargs = {})
%getitem_263 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_101, 1), kwargs = {})
%quantize_per_token_101 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_501, %getitem_262, %getitem_263, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_101 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_101, %getitem_262, %getitem_263, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_101 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg416_1, %arg417_1, %arg418_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_101 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_101,), kwargs = {})
%view_502 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_101, [1, 4096]), kwargs = {})
%mm_101 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_502, %t_101), kwargs = {})
%view_503 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_101, [1, 1, 4096]), kwargs = {})
%add_102 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_97, %view_503), kwargs = {})
%mul_251 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_102, %add_102), kwargs = {})
%mean_29 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_251, [-1], True), kwargs = {})
%add_103 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_29, 1e-06), kwargs = {})
%rsqrt_29 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_103,), kwargs = {})
%mul_252 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_102, %rsqrt_29), kwargs = {})
%mul_253 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_252, %arg30_1), kwargs = {})
%choose_qparams_per_token_asymmetric_102 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_253, torch.int8), kwargs = {})
%getitem_264 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_102, 0), kwargs = {})
%getitem_265 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_102, 1), kwargs = {})
%quantize_per_token_102 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_253, %getitem_264, %getitem_265, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_102 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_102, %getitem_264, %getitem_265, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_102 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg419_1, %arg420_1, %arg421_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_102 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_102,), kwargs = {})
%view_504 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_102, [1, 4096]), kwargs = {})
%mm_102 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_504, %t_102), kwargs = {})
%view_505 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_102, [1, 1, 11008]), kwargs = {})
%silu_14 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_505,), kwargs = {})
%choose_qparams_per_token_asymmetric_103 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_253, torch.int8), kwargs = {})
%getitem_266 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_103, 0), kwargs = {})
%getitem_267 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_103, 1), kwargs = {})
%quantize_per_token_103 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_253, %getitem_266, %getitem_267, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_103 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_103, %getitem_266, %getitem_267, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_103 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg422_1, %arg423_1, %arg424_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_103 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_103,), kwargs = {})
%view_506 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_103, [1, 4096]), kwargs = {})
%mm_103 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_506, %t_103), kwargs = {})
%view_507 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_103, [1, 1, 11008]), kwargs = {})
%mul_254 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_14, %view_507), kwargs = {})
%choose_qparams_per_token_asymmetric_104 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_254, torch.int8), kwargs = {})
%getitem_268 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_104, 0), kwargs = {})
%getitem_269 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_104, 1), kwargs = {})
%quantize_per_token_104 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_254, %getitem_268, %getitem_269, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_104 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_104, %getitem_268, %getitem_269, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_104 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg425_1, %arg426_1, %arg427_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_104 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_104,), kwargs = {})
%view_508 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_104, [1, 11008]), kwargs = {})
%mm_104 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_508, %t_104), kwargs = {})
%view_509 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_104, [1, 1, 4096]), kwargs = {})
%add_104 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_102, %view_509), kwargs = {})
%mul_255 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_104, %add_104), kwargs = {})
%mean_30 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_255, [-1], True), kwargs = {})
%add_105 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_30, 1e-06), kwargs = {})
%rsqrt_30 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_105,), kwargs = {})
%mul_256 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_104, %rsqrt_30), kwargs = {})
%mul_257 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_256, %arg31_1), kwargs = {})
%choose_qparams_per_token_asymmetric_105 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_257, torch.int8), kwargs = {})
%getitem_270 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_105, 0), kwargs = {})
%getitem_271 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_105, 1), kwargs = {})
%quantize_per_token_105 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_257, %getitem_270, %getitem_271, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_105 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_105, %getitem_270, %getitem_271, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_105 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg428_1, %arg429_1, %arg430_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_105 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_105,), kwargs = {})
%view_510 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_105, [1, 4096]), kwargs = {})
%mm_105 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_510, %t_105), kwargs = {})
%view_511 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_105, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_106 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_257, torch.int8), kwargs = {})
%getitem_272 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_106, 0), kwargs = {})
%getitem_273 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_106, 1), kwargs = {})
%quantize_per_token_106 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_257, %getitem_272, %getitem_273, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_106 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_106, %getitem_272, %getitem_273, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_106 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg431_1, %arg432_1, %arg433_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_106 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_106,), kwargs = {})
%view_512 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_106, [1, 4096]), kwargs = {})
%mm_106 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_512, %t_106), kwargs = {})
%view_513 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_106, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_107 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_257, torch.int8), kwargs = {})
%getitem_274 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_107, 0), kwargs = {})
%getitem_275 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_107, 1), kwargs = {})
%quantize_per_token_107 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_257, %getitem_274, %getitem_275, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_107 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_107, %getitem_274, %getitem_275, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_107 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg434_1, %arg435_1, %arg436_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_107 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_107,), kwargs = {})
%view_514 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_107, [1, 4096]), kwargs = {})
%mm_107 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_514, %t_107), kwargs = {})
%view_515 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_107, [1, 1, 4096]), kwargs = {})
%view_516 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_511, [1, 1, 32, 128]), kwargs = {})
%view_517 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_513, [1, 1, 32, 128]), kwargs = {})
%view_518 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_515, [1, 1, 32, 128]), kwargs = {})
%view_519 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_516, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_30 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_519, -1), kwargs = {})
%getitem_276 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_30, 0), kwargs = {})
%getitem_277 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_30, 1), kwargs = {})
%view_520 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_517, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_31 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_520, -1), kwargs = {})
%getitem_278 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_31, 0), kwargs = {})
%getitem_279 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_31, 1), kwargs = {})
%view_521 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_522 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_258 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_276, %view_521), kwargs = {})
%mul_259 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_277, %view_522), kwargs = {})
%sub_30 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_258, %mul_259), kwargs = {})
%mul_260 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_276, %view_522), kwargs = {})
%mul_261 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_277, %view_521), kwargs = {})
%add_106 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_260, %mul_261), kwargs = {})
%mul_262 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_278, %view_521), kwargs = {})
%mul_263 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_279, %view_522), kwargs = {})
%sub_31 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_262, %mul_263), kwargs = {})
%mul_264 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_278, %view_522), kwargs = {})
%mul_265 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_279, %view_521), kwargs = {})
%add_107 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_264, %mul_265), kwargs = {})
%stack_30 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_30, %add_106], -1), kwargs = {})
%view_523 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_30, [1, 1, 32, 128]), kwargs = {})
%stack_31 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_31, %add_107], -1), kwargs = {})
%view_524 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_31, [1, 1, 32, 128]), kwargs = {})
%transpose_75 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_523, 1, 2), kwargs = {})
%transpose_76 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_524, 1, 2), kwargs = {})
%transpose_77 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_518, 1, 2), kwargs = {})
%slice_151 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg437_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_152 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_151, 1, 0, 9223372036854775807), kwargs = {})
%view_525 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_76, [32, 1, 128]), kwargs = {})
%index_put_30 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_152, [None, None, %arg840_1], %view_525), kwargs = {})
%slice_153 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg437_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_60 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_153, %index_put_30, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_61 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg437_1, %slice_scatter_60, 0, 0, 9223372036854775807), kwargs = {})
%slice_156 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg438_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_157 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_156, 1, 0, 9223372036854775807), kwargs = {})
%view_526 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_77, [32, 1, 128]), kwargs = {})
%index_put_31 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_157, [None, None, %arg840_1], %view_526), kwargs = {})
%slice_158 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg438_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_62 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_158, %index_put_31, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_63 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg438_1, %slice_scatter_62, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_90 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg439_1, 0), kwargs = {})
%unsqueeze_91 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_90, 1), kwargs = {})
%index_17 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_91, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_93 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_61, 2), kwargs = {})
%expand_121 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_93, [1, 32, 1, 128, 128]), kwargs = {})
%clone_30 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_121,), kwargs = {memory_format: torch.contiguous_format})
%view_527 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_30, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_95 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_63, 2), kwargs = {})
%expand_123 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_95, [1, 32, 1, 128, 128]), kwargs = {})
%clone_31 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_123,), kwargs = {memory_format: torch.contiguous_format})
%view_528 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_31, [1, 32, 128, 128]), kwargs = {})
%zeros_like_15 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_17,), kwargs = {dtype: torch.float32})
%logical_not_15 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_17,), kwargs = {})
%masked_fill_15 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_15, %logical_not_15, -inf), kwargs = {})
%mul_266 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_75, 0.29730177875068026), kwargs = {})
%transpose_78 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_527, -2, -1), kwargs = {})
%mul_267 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_78, 0.29730177875068026), kwargs = {})
%expand_124 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_266, [1, 32, 1, 128]), kwargs = {})
%view_529 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_124, [32, 1, 128]), kwargs = {})
%expand_125 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_267, [1, 32, 128, 128]), kwargs = {})
%view_530 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_125, [32, 128, 128]), kwargs = {})
%bmm_30 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_529, %view_530), kwargs = {})
%view_531 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_30, [1, 32, 1, 128]), kwargs = {})
%add_108 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_531, %masked_fill_15), kwargs = {})
%_softmax_15 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_108, -1, False), kwargs = {})
%expand_126 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_15, [1, 32, 1, 128]), kwargs = {})
%view_532 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_126, [32, 1, 128]), kwargs = {})
%expand_127 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_528, [1, 32, 128, 128]), kwargs = {})
%view_533 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_127, [32, 128, 128]), kwargs = {})
%bmm_31 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_532, %view_533), kwargs = {})
%view_534 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_31, [1, 32, 1, 128]), kwargs = {})
%transpose_79 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_534, 1, 2), kwargs = {})
%view_535 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_79, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_108 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_535, torch.int8), kwargs = {})
%getitem_280 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_108, 0), kwargs = {})
%getitem_281 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_108, 1), kwargs = {})
%quantize_per_token_108 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_535, %getitem_280, %getitem_281, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_108 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_108, %getitem_280, %getitem_281, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_108 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg440_1, %arg441_1, %arg442_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_108 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_108,), kwargs = {})
%view_536 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_108, [1, 4096]), kwargs = {})
%mm_108 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_536, %t_108), kwargs = {})
%view_537 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_108, [1, 1, 4096]), kwargs = {})
%add_109 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_104, %view_537), kwargs = {})
%mul_268 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_109, %add_109), kwargs = {})
%mean_31 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_268, [-1], True), kwargs = {})
%add_110 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_31, 1e-06), kwargs = {})
%rsqrt_31 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_110,), kwargs = {})
%mul_269 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_109, %rsqrt_31), kwargs = {})
%mul_270 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_269, %arg32_1), kwargs = {})
%choose_qparams_per_token_asymmetric_109 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_270, torch.int8), kwargs = {})
%getitem_282 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_109, 0), kwargs = {})
%getitem_283 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_109, 1), kwargs = {})
%quantize_per_token_109 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_270, %getitem_282, %getitem_283, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_109 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_109, %getitem_282, %getitem_283, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_109 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg443_1, %arg444_1, %arg445_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_109 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_109,), kwargs = {})
%view_538 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_109, [1, 4096]), kwargs = {})
%mm_109 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_538, %t_109), kwargs = {})
%view_539 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_109, [1, 1, 11008]), kwargs = {})
%silu_15 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_539,), kwargs = {})
%choose_qparams_per_token_asymmetric_110 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_270, torch.int8), kwargs = {})
%getitem_284 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_110, 0), kwargs = {})
%getitem_285 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_110, 1), kwargs = {})
%quantize_per_token_110 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_270, %getitem_284, %getitem_285, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_110 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_110, %getitem_284, %getitem_285, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_110 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg446_1, %arg447_1, %arg448_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_110 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_110,), kwargs = {})
%view_540 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_110, [1, 4096]), kwargs = {})
%mm_110 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_540, %t_110), kwargs = {})
%view_541 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_110, [1, 1, 11008]), kwargs = {})
%mul_271 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_15, %view_541), kwargs = {})
%choose_qparams_per_token_asymmetric_111 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_271, torch.int8), kwargs = {})
%getitem_286 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_111, 0), kwargs = {})
%getitem_287 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_111, 1), kwargs = {})
%quantize_per_token_111 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_271, %getitem_286, %getitem_287, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_111 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_111, %getitem_286, %getitem_287, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_111 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg449_1, %arg450_1, %arg451_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_111 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_111,), kwargs = {})
%view_542 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_111, [1, 11008]), kwargs = {})
%mm_111 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_542, %t_111), kwargs = {})
%view_543 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_111, [1, 1, 4096]), kwargs = {})
%add_111 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_109, %view_543), kwargs = {})
%mul_272 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_111, %add_111), kwargs = {})
%mean_32 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_272, [-1], True), kwargs = {})
%add_112 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_32, 1e-06), kwargs = {})
%rsqrt_32 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_112,), kwargs = {})
%mul_273 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_111, %rsqrt_32), kwargs = {})
%mul_274 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_273, %arg33_1), kwargs = {})
%choose_qparams_per_token_asymmetric_112 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_274, torch.int8), kwargs = {})
%getitem_288 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_112, 0), kwargs = {})
%getitem_289 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_112, 1), kwargs = {})
%quantize_per_token_112 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_274, %getitem_288, %getitem_289, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_112 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_112, %getitem_288, %getitem_289, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_112 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg452_1, %arg453_1, %arg454_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_112 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_112,), kwargs = {})
%view_544 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_112, [1, 4096]), kwargs = {})
%mm_112 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_544, %t_112), kwargs = {})
%view_545 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_112, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_113 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_274, torch.int8), kwargs = {})
%getitem_290 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_113, 0), kwargs = {})
%getitem_291 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_113, 1), kwargs = {})
%quantize_per_token_113 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_274, %getitem_290, %getitem_291, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_113 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_113, %getitem_290, %getitem_291, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_113 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg455_1, %arg456_1, %arg457_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_113 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_113,), kwargs = {})
%view_546 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_113, [1, 4096]), kwargs = {})
%mm_113 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_546, %t_113), kwargs = {})
%view_547 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_113, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_114 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_274, torch.int8), kwargs = {})
%getitem_292 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_114, 0), kwargs = {})
%getitem_293 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_114, 1), kwargs = {})
%quantize_per_token_114 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_274, %getitem_292, %getitem_293, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_114 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_114, %getitem_292, %getitem_293, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_114 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg458_1, %arg459_1, %arg460_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_114 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_114,), kwargs = {})
%view_548 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_114, [1, 4096]), kwargs = {})
%mm_114 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_548, %t_114), kwargs = {})
%view_549 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_114, [1, 1, 4096]), kwargs = {})
%view_550 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_545, [1, 1, 32, 128]), kwargs = {})
%view_551 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_547, [1, 1, 32, 128]), kwargs = {})
%view_552 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_549, [1, 1, 32, 128]), kwargs = {})
%view_553 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_550, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_32 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_553, -1), kwargs = {})
%getitem_294 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_32, 0), kwargs = {})
%getitem_295 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_32, 1), kwargs = {})
%view_554 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_551, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_33 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_554, -1), kwargs = {})
%getitem_296 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_33, 0), kwargs = {})
%getitem_297 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_33, 1), kwargs = {})
%view_555 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_556 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_275 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_294, %view_555), kwargs = {})
%mul_276 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_295, %view_556), kwargs = {})
%sub_32 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_275, %mul_276), kwargs = {})
%mul_277 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_294, %view_556), kwargs = {})
%mul_278 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_295, %view_555), kwargs = {})
%add_113 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_277, %mul_278), kwargs = {})
%mul_279 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_296, %view_555), kwargs = {})
%mul_280 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_297, %view_556), kwargs = {})
%sub_33 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_279, %mul_280), kwargs = {})
%mul_281 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_296, %view_556), kwargs = {})
%mul_282 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_297, %view_555), kwargs = {})
%add_114 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_281, %mul_282), kwargs = {})
%stack_32 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_32, %add_113], -1), kwargs = {})
%view_557 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_32, [1, 1, 32, 128]), kwargs = {})
%stack_33 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_33, %add_114], -1), kwargs = {})
%view_558 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_33, [1, 1, 32, 128]), kwargs = {})
%transpose_80 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_557, 1, 2), kwargs = {})
%transpose_81 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_558, 1, 2), kwargs = {})
%transpose_82 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_552, 1, 2), kwargs = {})
%slice_161 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg461_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_162 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_161, 1, 0, 9223372036854775807), kwargs = {})
%view_559 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_81, [32, 1, 128]), kwargs = {})
%index_put_32 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_162, [None, None, %arg840_1], %view_559), kwargs = {})
%slice_163 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg461_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_64 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_163, %index_put_32, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_65 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg461_1, %slice_scatter_64, 0, 0, 9223372036854775807), kwargs = {})
%slice_166 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg462_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_167 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_166, 1, 0, 9223372036854775807), kwargs = {})
%view_560 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_82, [32, 1, 128]), kwargs = {})
%index_put_33 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_167, [None, None, %arg840_1], %view_560), kwargs = {})
%slice_168 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg462_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_66 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_168, %index_put_33, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_67 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg462_1, %slice_scatter_66, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_96 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg463_1, 0), kwargs = {})
%unsqueeze_97 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_96, 1), kwargs = {})
%index_18 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_97, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_99 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_65, 2), kwargs = {})
%expand_129 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_99, [1, 32, 1, 128, 128]), kwargs = {})
%clone_32 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_129,), kwargs = {memory_format: torch.contiguous_format})
%view_561 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_32, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_101 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_67, 2), kwargs = {})
%expand_131 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_101, [1, 32, 1, 128, 128]), kwargs = {})
%clone_33 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_131,), kwargs = {memory_format: torch.contiguous_format})
%view_562 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_33, [1, 32, 128, 128]), kwargs = {})
%zeros_like_16 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_18,), kwargs = {dtype: torch.float32})
%logical_not_16 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_18,), kwargs = {})
%masked_fill_16 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_16, %logical_not_16, -inf), kwargs = {})
%mul_283 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_80, 0.29730177875068026), kwargs = {})
%transpose_83 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_561, -2, -1), kwargs = {})
%mul_284 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_83, 0.29730177875068026), kwargs = {})
%expand_132 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_283, [1, 32, 1, 128]), kwargs = {})
%view_563 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_132, [32, 1, 128]), kwargs = {})
%expand_133 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_284, [1, 32, 128, 128]), kwargs = {})
%view_564 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_133, [32, 128, 128]), kwargs = {})
%bmm_32 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_563, %view_564), kwargs = {})
%view_565 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_32, [1, 32, 1, 128]), kwargs = {})
%add_115 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_565, %masked_fill_16), kwargs = {})
%_softmax_16 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_115, -1, False), kwargs = {})
%expand_134 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_16, [1, 32, 1, 128]), kwargs = {})
%view_566 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_134, [32, 1, 128]), kwargs = {})
%expand_135 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_562, [1, 32, 128, 128]), kwargs = {})
%view_567 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_135, [32, 128, 128]), kwargs = {})
%bmm_33 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_566, %view_567), kwargs = {})
%view_568 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_33, [1, 32, 1, 128]), kwargs = {})
%transpose_84 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_568, 1, 2), kwargs = {})
%view_569 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_84, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_115 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_569, torch.int8), kwargs = {})
%getitem_298 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_115, 0), kwargs = {})
%getitem_299 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_115, 1), kwargs = {})
%quantize_per_token_115 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_569, %getitem_298, %getitem_299, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_115 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_115, %getitem_298, %getitem_299, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_115 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg464_1, %arg465_1, %arg466_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_115 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_115,), kwargs = {})
%view_570 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_115, [1, 4096]), kwargs = {})
%mm_115 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_570, %t_115), kwargs = {})
%view_571 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_115, [1, 1, 4096]), kwargs = {})
%add_116 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_111, %view_571), kwargs = {})
%mul_285 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_116, %add_116), kwargs = {})
%mean_33 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_285, [-1], True), kwargs = {})
%add_117 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_33, 1e-06), kwargs = {})
%rsqrt_33 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_117,), kwargs = {})
%mul_286 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_116, %rsqrt_33), kwargs = {})
%mul_287 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_286, %arg34_1), kwargs = {})
%choose_qparams_per_token_asymmetric_116 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_287, torch.int8), kwargs = {})
%getitem_300 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_116, 0), kwargs = {})
%getitem_301 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_116, 1), kwargs = {})
%quantize_per_token_116 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_287, %getitem_300, %getitem_301, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_116 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_116, %getitem_300, %getitem_301, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_116 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg467_1, %arg468_1, %arg469_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_116 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_116,), kwargs = {})
%view_572 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_116, [1, 4096]), kwargs = {})
%mm_116 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_572, %t_116), kwargs = {})
%view_573 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_116, [1, 1, 11008]), kwargs = {})
%silu_16 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_573,), kwargs = {})
%choose_qparams_per_token_asymmetric_117 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_287, torch.int8), kwargs = {})
%getitem_302 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_117, 0), kwargs = {})
%getitem_303 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_117, 1), kwargs = {})
%quantize_per_token_117 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_287, %getitem_302, %getitem_303, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_117 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_117, %getitem_302, %getitem_303, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_117 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg470_1, %arg471_1, %arg472_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_117 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_117,), kwargs = {})
%view_574 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_117, [1, 4096]), kwargs = {})
%mm_117 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_574, %t_117), kwargs = {})
%view_575 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_117, [1, 1, 11008]), kwargs = {})
%mul_288 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_16, %view_575), kwargs = {})
%choose_qparams_per_token_asymmetric_118 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_288, torch.int8), kwargs = {})
%getitem_304 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_118, 0), kwargs = {})
%getitem_305 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_118, 1), kwargs = {})
%quantize_per_token_118 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_288, %getitem_304, %getitem_305, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_118 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_118, %getitem_304, %getitem_305, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_118 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg473_1, %arg474_1, %arg475_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_118 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_118,), kwargs = {})
%view_576 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_118, [1, 11008]), kwargs = {})
%mm_118 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_576, %t_118), kwargs = {})
%view_577 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_118, [1, 1, 4096]), kwargs = {})
%add_118 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_116, %view_577), kwargs = {})
%mul_289 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_118, %add_118), kwargs = {})
%mean_34 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_289, [-1], True), kwargs = {})
%add_119 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_34, 1e-06), kwargs = {})
%rsqrt_34 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_119,), kwargs = {})
%mul_290 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_118, %rsqrt_34), kwargs = {})
%mul_291 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_290, %arg35_1), kwargs = {})
%choose_qparams_per_token_asymmetric_119 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_291, torch.int8), kwargs = {})
%getitem_306 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_119, 0), kwargs = {})
%getitem_307 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_119, 1), kwargs = {})
%quantize_per_token_119 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_291, %getitem_306, %getitem_307, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_119 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_119, %getitem_306, %getitem_307, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_119 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg476_1, %arg477_1, %arg478_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_119 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_119,), kwargs = {})
%view_578 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_119, [1, 4096]), kwargs = {})
%mm_119 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_578, %t_119), kwargs = {})
%view_579 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_119, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_120 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_291, torch.int8), kwargs = {})
%getitem_308 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_120, 0), kwargs = {})
%getitem_309 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_120, 1), kwargs = {})
%quantize_per_token_120 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_291, %getitem_308, %getitem_309, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_120 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_120, %getitem_308, %getitem_309, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_120 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg479_1, %arg480_1, %arg481_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_120 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_120,), kwargs = {})
%view_580 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_120, [1, 4096]), kwargs = {})
%mm_120 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_580, %t_120), kwargs = {})
%view_581 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_120, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_121 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_291, torch.int8), kwargs = {})
%getitem_310 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_121, 0), kwargs = {})
%getitem_311 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_121, 1), kwargs = {})
%quantize_per_token_121 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_291, %getitem_310, %getitem_311, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_121 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_121, %getitem_310, %getitem_311, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_121 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg482_1, %arg483_1, %arg484_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_121 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_121,), kwargs = {})
%view_582 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_121, [1, 4096]), kwargs = {})
%mm_121 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_582, %t_121), kwargs = {})
%view_583 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_121, [1, 1, 4096]), kwargs = {})
%view_584 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_579, [1, 1, 32, 128]), kwargs = {})
%view_585 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_581, [1, 1, 32, 128]), kwargs = {})
%view_586 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_583, [1, 1, 32, 128]), kwargs = {})
%view_587 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_584, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_34 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_587, -1), kwargs = {})
%getitem_312 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_34, 0), kwargs = {})
%getitem_313 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_34, 1), kwargs = {})
%view_588 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_585, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_35 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_588, -1), kwargs = {})
%getitem_314 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_35, 0), kwargs = {})
%getitem_315 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_35, 1), kwargs = {})
%view_589 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_590 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_292 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_312, %view_589), kwargs = {})
%mul_293 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_313, %view_590), kwargs = {})
%sub_34 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_292, %mul_293), kwargs = {})
%mul_294 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_312, %view_590), kwargs = {})
%mul_295 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_313, %view_589), kwargs = {})
%add_120 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_294, %mul_295), kwargs = {})
%mul_296 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_314, %view_589), kwargs = {})
%mul_297 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_315, %view_590), kwargs = {})
%sub_35 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_296, %mul_297), kwargs = {})
%mul_298 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_314, %view_590), kwargs = {})
%mul_299 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_315, %view_589), kwargs = {})
%add_121 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_298, %mul_299), kwargs = {})
%stack_34 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_34, %add_120], -1), kwargs = {})
%view_591 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_34, [1, 1, 32, 128]), kwargs = {})
%stack_35 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_35, %add_121], -1), kwargs = {})
%view_592 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_35, [1, 1, 32, 128]), kwargs = {})
%transpose_85 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_591, 1, 2), kwargs = {})
%transpose_86 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_592, 1, 2), kwargs = {})
%transpose_87 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_586, 1, 2), kwargs = {})
%slice_171 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg485_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_172 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_171, 1, 0, 9223372036854775807), kwargs = {})
%view_593 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_86, [32, 1, 128]), kwargs = {})
%index_put_34 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_172, [None, None, %arg840_1], %view_593), kwargs = {})
%slice_173 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg485_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_68 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_173, %index_put_34, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_69 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg485_1, %slice_scatter_68, 0, 0, 9223372036854775807), kwargs = {})
%slice_176 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg486_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_177 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_176, 1, 0, 9223372036854775807), kwargs = {})
%view_594 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_87, [32, 1, 128]), kwargs = {})
%index_put_35 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_177, [None, None, %arg840_1], %view_594), kwargs = {})
%slice_178 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg486_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_70 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_178, %index_put_35, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_71 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg486_1, %slice_scatter_70, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_102 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg487_1, 0), kwargs = {})
%unsqueeze_103 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_102, 1), kwargs = {})
%index_19 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_103, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_105 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_69, 2), kwargs = {})
%expand_137 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_105, [1, 32, 1, 128, 128]), kwargs = {})
%clone_34 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_137,), kwargs = {memory_format: torch.contiguous_format})
%view_595 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_34, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_107 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_71, 2), kwargs = {})
%expand_139 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_107, [1, 32, 1, 128, 128]), kwargs = {})
%clone_35 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_139,), kwargs = {memory_format: torch.contiguous_format})
%view_596 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_35, [1, 32, 128, 128]), kwargs = {})
%zeros_like_17 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_19,), kwargs = {dtype: torch.float32})
%logical_not_17 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_19,), kwargs = {})
%masked_fill_17 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_17, %logical_not_17, -inf), kwargs = {})
%mul_300 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_85, 0.29730177875068026), kwargs = {})
%transpose_88 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_595, -2, -1), kwargs = {})
%mul_301 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_88, 0.29730177875068026), kwargs = {})
%expand_140 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_300, [1, 32, 1, 128]), kwargs = {})
%view_597 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_140, [32, 1, 128]), kwargs = {})
%expand_141 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_301, [1, 32, 128, 128]), kwargs = {})
%view_598 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_141, [32, 128, 128]), kwargs = {})
%bmm_34 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_597, %view_598), kwargs = {})
%view_599 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_34, [1, 32, 1, 128]), kwargs = {})
%add_122 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_599, %masked_fill_17), kwargs = {})
%_softmax_17 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_122, -1, False), kwargs = {})
%expand_142 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_17, [1, 32, 1, 128]), kwargs = {})
%view_600 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_142, [32, 1, 128]), kwargs = {})
%expand_143 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_596, [1, 32, 128, 128]), kwargs = {})
%view_601 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_143, [32, 128, 128]), kwargs = {})
%bmm_35 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_600, %view_601), kwargs = {})
%view_602 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_35, [1, 32, 1, 128]), kwargs = {})
%transpose_89 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_602, 1, 2), kwargs = {})
%view_603 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_89, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_122 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_603, torch.int8), kwargs = {})
%getitem_316 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_122, 0), kwargs = {})
%getitem_317 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_122, 1), kwargs = {})
%quantize_per_token_122 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_603, %getitem_316, %getitem_317, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_122 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_122, %getitem_316, %getitem_317, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_122 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg488_1, %arg489_1, %arg490_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_122 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_122,), kwargs = {})
%view_604 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_122, [1, 4096]), kwargs = {})
%mm_122 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_604, %t_122), kwargs = {})
%view_605 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_122, [1, 1, 4096]), kwargs = {})
%add_123 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_118, %view_605), kwargs = {})
%mul_302 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_123, %add_123), kwargs = {})
%mean_35 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_302, [-1], True), kwargs = {})
%add_124 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_35, 1e-06), kwargs = {})
%rsqrt_35 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_124,), kwargs = {})
%mul_303 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_123, %rsqrt_35), kwargs = {})
%mul_304 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_303, %arg36_1), kwargs = {})
%choose_qparams_per_token_asymmetric_123 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_304, torch.int8), kwargs = {})
%getitem_318 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_123, 0), kwargs = {})
%getitem_319 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_123, 1), kwargs = {})
%quantize_per_token_123 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_304, %getitem_318, %getitem_319, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_123 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_123, %getitem_318, %getitem_319, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_123 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg491_1, %arg492_1, %arg493_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_123 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_123,), kwargs = {})
%view_606 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_123, [1, 4096]), kwargs = {})
%mm_123 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_606, %t_123), kwargs = {})
%view_607 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_123, [1, 1, 11008]), kwargs = {})
%silu_17 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_607,), kwargs = {})
%choose_qparams_per_token_asymmetric_124 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_304, torch.int8), kwargs = {})
%getitem_320 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_124, 0), kwargs = {})
%getitem_321 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_124, 1), kwargs = {})
%quantize_per_token_124 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_304, %getitem_320, %getitem_321, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_124 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_124, %getitem_320, %getitem_321, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_124 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg494_1, %arg495_1, %arg496_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_124 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_124,), kwargs = {})
%view_608 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_124, [1, 4096]), kwargs = {})
%mm_124 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_608, %t_124), kwargs = {})
%view_609 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_124, [1, 1, 11008]), kwargs = {})
%mul_305 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_17, %view_609), kwargs = {})
%choose_qparams_per_token_asymmetric_125 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_305, torch.int8), kwargs = {})
%getitem_322 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_125, 0), kwargs = {})
%getitem_323 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_125, 1), kwargs = {})
%quantize_per_token_125 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_305, %getitem_322, %getitem_323, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_125 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_125, %getitem_322, %getitem_323, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_125 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg497_1, %arg498_1, %arg499_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_125 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_125,), kwargs = {})
%view_610 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_125, [1, 11008]), kwargs = {})
%mm_125 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_610, %t_125), kwargs = {})
%view_611 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_125, [1, 1, 4096]), kwargs = {})
%add_125 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_123, %view_611), kwargs = {})
%mul_306 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_125, %add_125), kwargs = {})
%mean_36 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_306, [-1], True), kwargs = {})
%add_126 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_36, 1e-06), kwargs = {})
%rsqrt_36 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_126,), kwargs = {})
%mul_307 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_125, %rsqrt_36), kwargs = {})
%mul_308 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_307, %arg37_1), kwargs = {})
%choose_qparams_per_token_asymmetric_126 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_308, torch.int8), kwargs = {})
%getitem_324 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_126, 0), kwargs = {})
%getitem_325 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_126, 1), kwargs = {})
%quantize_per_token_126 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_308, %getitem_324, %getitem_325, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_126 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_126, %getitem_324, %getitem_325, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_126 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg500_1, %arg501_1, %arg502_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_126 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_126,), kwargs = {})
%view_612 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_126, [1, 4096]), kwargs = {})
%mm_126 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_612, %t_126), kwargs = {})
%view_613 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_126, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_127 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_308, torch.int8), kwargs = {})
%getitem_326 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_127, 0), kwargs = {})
%getitem_327 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_127, 1), kwargs = {})
%quantize_per_token_127 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_308, %getitem_326, %getitem_327, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_127 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_127, %getitem_326, %getitem_327, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_127 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg503_1, %arg504_1, %arg505_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_127 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_127,), kwargs = {})
%view_614 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_127, [1, 4096]), kwargs = {})
%mm_127 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_614, %t_127), kwargs = {})
%view_615 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_127, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_128 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_308, torch.int8), kwargs = {})
%getitem_328 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_128, 0), kwargs = {})
%getitem_329 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_128, 1), kwargs = {})
%quantize_per_token_128 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_308, %getitem_328, %getitem_329, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_128 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_128, %getitem_328, %getitem_329, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_128 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg506_1, %arg507_1, %arg508_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_128 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_128,), kwargs = {})
%view_616 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_128, [1, 4096]), kwargs = {})
%mm_128 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_616, %t_128), kwargs = {})
%view_617 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_128, [1, 1, 4096]), kwargs = {})
%view_618 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_613, [1, 1, 32, 128]), kwargs = {})
%view_619 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_615, [1, 1, 32, 128]), kwargs = {})
%view_620 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_617, [1, 1, 32, 128]), kwargs = {})
%view_621 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_618, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_36 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_621, -1), kwargs = {})
%getitem_330 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_36, 0), kwargs = {})
%getitem_331 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_36, 1), kwargs = {})
%view_622 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_619, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_37 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_622, -1), kwargs = {})
%getitem_332 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_37, 0), kwargs = {})
%getitem_333 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_37, 1), kwargs = {})
%view_623 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_624 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_309 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_330, %view_623), kwargs = {})
%mul_310 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_331, %view_624), kwargs = {})
%sub_36 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_309, %mul_310), kwargs = {})
%mul_311 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_330, %view_624), kwargs = {})
%mul_312 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_331, %view_623), kwargs = {})
%add_127 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_311, %mul_312), kwargs = {})
%mul_313 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_332, %view_623), kwargs = {})
%mul_314 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_333, %view_624), kwargs = {})
%sub_37 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_313, %mul_314), kwargs = {})
%mul_315 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_332, %view_624), kwargs = {})
%mul_316 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_333, %view_623), kwargs = {})
%add_128 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_315, %mul_316), kwargs = {})
%stack_36 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_36, %add_127], -1), kwargs = {})
%view_625 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_36, [1, 1, 32, 128]), kwargs = {})
%stack_37 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_37, %add_128], -1), kwargs = {})
%view_626 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_37, [1, 1, 32, 128]), kwargs = {})
%transpose_90 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_625, 1, 2), kwargs = {})
%transpose_91 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_626, 1, 2), kwargs = {})
%transpose_92 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_620, 1, 2), kwargs = {})
%slice_181 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg509_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_182 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_181, 1, 0, 9223372036854775807), kwargs = {})
%view_627 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_91, [32, 1, 128]), kwargs = {})
%index_put_36 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_182, [None, None, %arg840_1], %view_627), kwargs = {})
%slice_183 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg509_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_72 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_183, %index_put_36, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_73 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg509_1, %slice_scatter_72, 0, 0, 9223372036854775807), kwargs = {})
%slice_186 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg510_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_187 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_186, 1, 0, 9223372036854775807), kwargs = {})
%view_628 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_92, [32, 1, 128]), kwargs = {})
%index_put_37 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_187, [None, None, %arg840_1], %view_628), kwargs = {})
%slice_188 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg510_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_74 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_188, %index_put_37, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_75 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg510_1, %slice_scatter_74, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_108 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg511_1, 0), kwargs = {})
%unsqueeze_109 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_108, 1), kwargs = {})
%index_20 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_109, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_111 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_73, 2), kwargs = {})
%expand_145 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_111, [1, 32, 1, 128, 128]), kwargs = {})
%clone_36 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_145,), kwargs = {memory_format: torch.contiguous_format})
%view_629 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_36, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_113 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_75, 2), kwargs = {})
%expand_147 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_113, [1, 32, 1, 128, 128]), kwargs = {})
%clone_37 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_147,), kwargs = {memory_format: torch.contiguous_format})
%view_630 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_37, [1, 32, 128, 128]), kwargs = {})
%zeros_like_18 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_20,), kwargs = {dtype: torch.float32})
%logical_not_18 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_20,), kwargs = {})
%masked_fill_18 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_18, %logical_not_18, -inf), kwargs = {})
%mul_317 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_90, 0.29730177875068026), kwargs = {})
%transpose_93 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_629, -2, -1), kwargs = {})
%mul_318 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_93, 0.29730177875068026), kwargs = {})
%expand_148 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_317, [1, 32, 1, 128]), kwargs = {})
%view_631 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_148, [32, 1, 128]), kwargs = {})
%expand_149 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_318, [1, 32, 128, 128]), kwargs = {})
%view_632 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_149, [32, 128, 128]), kwargs = {})
%bmm_36 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_631, %view_632), kwargs = {})
%view_633 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_36, [1, 32, 1, 128]), kwargs = {})
%add_129 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_633, %masked_fill_18), kwargs = {})
%_softmax_18 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_129, -1, False), kwargs = {})
%expand_150 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_18, [1, 32, 1, 128]), kwargs = {})
%view_634 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_150, [32, 1, 128]), kwargs = {})
%expand_151 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_630, [1, 32, 128, 128]), kwargs = {})
%view_635 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_151, [32, 128, 128]), kwargs = {})
%bmm_37 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_634, %view_635), kwargs = {})
%view_636 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_37, [1, 32, 1, 128]), kwargs = {})
%transpose_94 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_636, 1, 2), kwargs = {})
%view_637 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_94, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_129 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_637, torch.int8), kwargs = {})
%getitem_334 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_129, 0), kwargs = {})
%getitem_335 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_129, 1), kwargs = {})
%quantize_per_token_129 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_637, %getitem_334, %getitem_335, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_129 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_129, %getitem_334, %getitem_335, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_129 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg512_1, %arg513_1, %arg514_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_129 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_129,), kwargs = {})
%view_638 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_129, [1, 4096]), kwargs = {})
%mm_129 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_638, %t_129), kwargs = {})
%view_639 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_129, [1, 1, 4096]), kwargs = {})
%add_130 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_125, %view_639), kwargs = {})
%mul_319 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_130, %add_130), kwargs = {})
%mean_37 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_319, [-1], True), kwargs = {})
%add_131 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_37, 1e-06), kwargs = {})
%rsqrt_37 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_131,), kwargs = {})
%mul_320 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_130, %rsqrt_37), kwargs = {})
%mul_321 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_320, %arg38_1), kwargs = {})
%choose_qparams_per_token_asymmetric_130 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_321, torch.int8), kwargs = {})
%getitem_336 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_130, 0), kwargs = {})
%getitem_337 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_130, 1), kwargs = {})
%quantize_per_token_130 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_321, %getitem_336, %getitem_337, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_130 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_130, %getitem_336, %getitem_337, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_130 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg515_1, %arg516_1, %arg517_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_130 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_130,), kwargs = {})
%view_640 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_130, [1, 4096]), kwargs = {})
%mm_130 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_640, %t_130), kwargs = {})
%view_641 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_130, [1, 1, 11008]), kwargs = {})
%silu_18 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_641,), kwargs = {})
%choose_qparams_per_token_asymmetric_131 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_321, torch.int8), kwargs = {})
%getitem_338 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_131, 0), kwargs = {})
%getitem_339 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_131, 1), kwargs = {})
%quantize_per_token_131 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_321, %getitem_338, %getitem_339, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_131 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_131, %getitem_338, %getitem_339, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_131 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg518_1, %arg519_1, %arg520_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_131 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_131,), kwargs = {})
%view_642 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_131, [1, 4096]), kwargs = {})
%mm_131 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_642, %t_131), kwargs = {})
%view_643 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_131, [1, 1, 11008]), kwargs = {})
%mul_322 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_18, %view_643), kwargs = {})
%choose_qparams_per_token_asymmetric_132 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_322, torch.int8), kwargs = {})
%getitem_340 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_132, 0), kwargs = {})
%getitem_341 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_132, 1), kwargs = {})
%quantize_per_token_132 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_322, %getitem_340, %getitem_341, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_132 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_132, %getitem_340, %getitem_341, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_132 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg521_1, %arg522_1, %arg523_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_132 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_132,), kwargs = {})
%view_644 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_132, [1, 11008]), kwargs = {})
%mm_132 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_644, %t_132), kwargs = {})
%view_645 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_132, [1, 1, 4096]), kwargs = {})
%add_132 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_130, %view_645), kwargs = {})
%mul_323 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_132, %add_132), kwargs = {})
%mean_38 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_323, [-1], True), kwargs = {})
%add_133 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_38, 1e-06), kwargs = {})
%rsqrt_38 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_133,), kwargs = {})
%mul_324 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_132, %rsqrt_38), kwargs = {})
%mul_325 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_324, %arg39_1), kwargs = {})
%choose_qparams_per_token_asymmetric_133 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_325, torch.int8), kwargs = {})
%getitem_342 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_133, 0), kwargs = {})
%getitem_343 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_133, 1), kwargs = {})
%quantize_per_token_133 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_325, %getitem_342, %getitem_343, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_133 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_133, %getitem_342, %getitem_343, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_133 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg524_1, %arg525_1, %arg526_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_133 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_133,), kwargs = {})
%view_646 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_133, [1, 4096]), kwargs = {})
%mm_133 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_646, %t_133), kwargs = {})
%view_647 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_133, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_134 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_325, torch.int8), kwargs = {})
%getitem_344 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_134, 0), kwargs = {})
%getitem_345 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_134, 1), kwargs = {})
%quantize_per_token_134 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_325, %getitem_344, %getitem_345, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_134 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_134, %getitem_344, %getitem_345, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_134 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg527_1, %arg528_1, %arg529_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_134 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_134,), kwargs = {})
%view_648 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_134, [1, 4096]), kwargs = {})
%mm_134 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_648, %t_134), kwargs = {})
%view_649 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_134, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_135 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_325, torch.int8), kwargs = {})
%getitem_346 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_135, 0), kwargs = {})
%getitem_347 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_135, 1), kwargs = {})
%quantize_per_token_135 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_325, %getitem_346, %getitem_347, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_135 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_135, %getitem_346, %getitem_347, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_135 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg530_1, %arg531_1, %arg532_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_135 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_135,), kwargs = {})
%view_650 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_135, [1, 4096]), kwargs = {})
%mm_135 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_650, %t_135), kwargs = {})
%view_651 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_135, [1, 1, 4096]), kwargs = {})
%view_652 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_647, [1, 1, 32, 128]), kwargs = {})
%view_653 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_649, [1, 1, 32, 128]), kwargs = {})
%view_654 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_651, [1, 1, 32, 128]), kwargs = {})
%view_655 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_652, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_38 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_655, -1), kwargs = {})
%getitem_348 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_38, 0), kwargs = {})
%getitem_349 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_38, 1), kwargs = {})
%view_656 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_653, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_39 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_656, -1), kwargs = {})
%getitem_350 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_39, 0), kwargs = {})
%getitem_351 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_39, 1), kwargs = {})
%view_657 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_658 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_326 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_348, %view_657), kwargs = {})
%mul_327 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_349, %view_658), kwargs = {})
%sub_38 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_326, %mul_327), kwargs = {})
%mul_328 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_348, %view_658), kwargs = {})
%mul_329 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_349, %view_657), kwargs = {})
%add_134 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_328, %mul_329), kwargs = {})
%mul_330 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_350, %view_657), kwargs = {})
%mul_331 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_351, %view_658), kwargs = {})
%sub_39 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_330, %mul_331), kwargs = {})
%mul_332 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_350, %view_658), kwargs = {})
%mul_333 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_351, %view_657), kwargs = {})
%add_135 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_332, %mul_333), kwargs = {})
%stack_38 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_38, %add_134], -1), kwargs = {})
%view_659 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_38, [1, 1, 32, 128]), kwargs = {})
%stack_39 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_39, %add_135], -1), kwargs = {})
%view_660 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_39, [1, 1, 32, 128]), kwargs = {})
%transpose_95 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_659, 1, 2), kwargs = {})
%transpose_96 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_660, 1, 2), kwargs = {})
%transpose_97 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_654, 1, 2), kwargs = {})
%slice_191 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg533_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_192 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_191, 1, 0, 9223372036854775807), kwargs = {})
%view_661 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_96, [32, 1, 128]), kwargs = {})
%index_put_38 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_192, [None, None, %arg840_1], %view_661), kwargs = {})
%slice_193 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg533_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_76 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_193, %index_put_38, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_77 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg533_1, %slice_scatter_76, 0, 0, 9223372036854775807), kwargs = {})
%slice_196 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg534_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_197 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_196, 1, 0, 9223372036854775807), kwargs = {})
%view_662 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_97, [32, 1, 128]), kwargs = {})
%index_put_39 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_197, [None, None, %arg840_1], %view_662), kwargs = {})
%slice_198 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg534_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_78 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_198, %index_put_39, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_79 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg534_1, %slice_scatter_78, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_114 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg535_1, 0), kwargs = {})
%unsqueeze_115 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_114, 1), kwargs = {})
%index_21 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_115, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_117 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_77, 2), kwargs = {})
%expand_153 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_117, [1, 32, 1, 128, 128]), kwargs = {})
%clone_38 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_153,), kwargs = {memory_format: torch.contiguous_format})
%view_663 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_38, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_119 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_79, 2), kwargs = {})
%expand_155 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_119, [1, 32, 1, 128, 128]), kwargs = {})
%clone_39 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_155,), kwargs = {memory_format: torch.contiguous_format})
%view_664 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_39, [1, 32, 128, 128]), kwargs = {})
%zeros_like_19 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_21,), kwargs = {dtype: torch.float32})
%logical_not_19 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_21,), kwargs = {})
%masked_fill_19 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_19, %logical_not_19, -inf), kwargs = {})
%mul_334 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_95, 0.29730177875068026), kwargs = {})
%transpose_98 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_663, -2, -1), kwargs = {})
%mul_335 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_98, 0.29730177875068026), kwargs = {})
%expand_156 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_334, [1, 32, 1, 128]), kwargs = {})
%view_665 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_156, [32, 1, 128]), kwargs = {})
%expand_157 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_335, [1, 32, 128, 128]), kwargs = {})
%view_666 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_157, [32, 128, 128]), kwargs = {})
%bmm_38 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_665, %view_666), kwargs = {})
%view_667 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_38, [1, 32, 1, 128]), kwargs = {})
%add_136 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_667, %masked_fill_19), kwargs = {})
%_softmax_19 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_136, -1, False), kwargs = {})
%expand_158 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_19, [1, 32, 1, 128]), kwargs = {})
%view_668 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_158, [32, 1, 128]), kwargs = {})
%expand_159 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_664, [1, 32, 128, 128]), kwargs = {})
%view_669 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_159, [32, 128, 128]), kwargs = {})
%bmm_39 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_668, %view_669), kwargs = {})
%view_670 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_39, [1, 32, 1, 128]), kwargs = {})
%transpose_99 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_670, 1, 2), kwargs = {})
%view_671 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_99, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_136 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_671, torch.int8), kwargs = {})
%getitem_352 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_136, 0), kwargs = {})
%getitem_353 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_136, 1), kwargs = {})
%quantize_per_token_136 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_671, %getitem_352, %getitem_353, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_136 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_136, %getitem_352, %getitem_353, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_136 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg536_1, %arg537_1, %arg538_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_136 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_136,), kwargs = {})
%view_672 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_136, [1, 4096]), kwargs = {})
%mm_136 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_672, %t_136), kwargs = {})
%view_673 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_136, [1, 1, 4096]), kwargs = {})
%add_137 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_132, %view_673), kwargs = {})
%mul_336 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_137, %add_137), kwargs = {})
%mean_39 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_336, [-1], True), kwargs = {})
%add_138 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_39, 1e-06), kwargs = {})
%rsqrt_39 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_138,), kwargs = {})
%mul_337 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_137, %rsqrt_39), kwargs = {})
%mul_338 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_337, %arg40_1), kwargs = {})
%choose_qparams_per_token_asymmetric_137 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_338, torch.int8), kwargs = {})
%getitem_354 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_137, 0), kwargs = {})
%getitem_355 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_137, 1), kwargs = {})
%quantize_per_token_137 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_338, %getitem_354, %getitem_355, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_137 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_137, %getitem_354, %getitem_355, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_137 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg539_1, %arg540_1, %arg541_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_137 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_137,), kwargs = {})
%view_674 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_137, [1, 4096]), kwargs = {})
%mm_137 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_674, %t_137), kwargs = {})
%view_675 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_137, [1, 1, 11008]), kwargs = {})
%silu_19 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_675,), kwargs = {})
%choose_qparams_per_token_asymmetric_138 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_338, torch.int8), kwargs = {})
%getitem_356 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_138, 0), kwargs = {})
%getitem_357 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_138, 1), kwargs = {})
%quantize_per_token_138 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_338, %getitem_356, %getitem_357, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_138 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_138, %getitem_356, %getitem_357, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_138 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg542_1, %arg543_1, %arg544_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_138 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_138,), kwargs = {})
%view_676 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_138, [1, 4096]), kwargs = {})
%mm_138 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_676, %t_138), kwargs = {})
%view_677 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_138, [1, 1, 11008]), kwargs = {})
%mul_339 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_19, %view_677), kwargs = {})
%choose_qparams_per_token_asymmetric_139 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_339, torch.int8), kwargs = {})
%getitem_358 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_139, 0), kwargs = {})
%getitem_359 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_139, 1), kwargs = {})
%quantize_per_token_139 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_339, %getitem_358, %getitem_359, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_139 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_139, %getitem_358, %getitem_359, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_139 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg545_1, %arg546_1, %arg547_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_139 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_139,), kwargs = {})
%view_678 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_139, [1, 11008]), kwargs = {})
%mm_139 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_678, %t_139), kwargs = {})
%view_679 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_139, [1, 1, 4096]), kwargs = {})
%add_139 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_137, %view_679), kwargs = {})
%mul_340 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_139, %add_139), kwargs = {})
%mean_40 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_340, [-1], True), kwargs = {})
%add_140 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_40, 1e-06), kwargs = {})
%rsqrt_40 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_140,), kwargs = {})
%mul_341 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_139, %rsqrt_40), kwargs = {})
%mul_342 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_341, %arg41_1), kwargs = {})
%choose_qparams_per_token_asymmetric_140 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_342, torch.int8), kwargs = {})
%getitem_360 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_140, 0), kwargs = {})
%getitem_361 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_140, 1), kwargs = {})
%quantize_per_token_140 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_342, %getitem_360, %getitem_361, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_140 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_140, %getitem_360, %getitem_361, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_140 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg548_1, %arg549_1, %arg550_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_140 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_140,), kwargs = {})
%view_680 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_140, [1, 4096]), kwargs = {})
%mm_140 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_680, %t_140), kwargs = {})
%view_681 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_140, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_141 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_342, torch.int8), kwargs = {})
%getitem_362 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_141, 0), kwargs = {})
%getitem_363 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_141, 1), kwargs = {})
%quantize_per_token_141 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_342, %getitem_362, %getitem_363, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_141 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_141, %getitem_362, %getitem_363, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_141 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg551_1, %arg552_1, %arg553_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_141 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_141,), kwargs = {})
%view_682 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_141, [1, 4096]), kwargs = {})
%mm_141 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_682, %t_141), kwargs = {})
%view_683 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_141, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_142 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_342, torch.int8), kwargs = {})
%getitem_364 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_142, 0), kwargs = {})
%getitem_365 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_142, 1), kwargs = {})
%quantize_per_token_142 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_342, %getitem_364, %getitem_365, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_142 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_142, %getitem_364, %getitem_365, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_142 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg554_1, %arg555_1, %arg556_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_142 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_142,), kwargs = {})
%view_684 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_142, [1, 4096]), kwargs = {})
%mm_142 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_684, %t_142), kwargs = {})
%view_685 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_142, [1, 1, 4096]), kwargs = {})
%view_686 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_681, [1, 1, 32, 128]), kwargs = {})
%view_687 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_683, [1, 1, 32, 128]), kwargs = {})
%view_688 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_685, [1, 1, 32, 128]), kwargs = {})
%view_689 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_686, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_40 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_689, -1), kwargs = {})
%getitem_366 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_40, 0), kwargs = {})
%getitem_367 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_40, 1), kwargs = {})
%view_690 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_687, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_41 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_690, -1), kwargs = {})
%getitem_368 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_41, 0), kwargs = {})
%getitem_369 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_41, 1), kwargs = {})
%view_691 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_692 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_343 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_366, %view_691), kwargs = {})
%mul_344 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_367, %view_692), kwargs = {})
%sub_40 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_343, %mul_344), kwargs = {})
%mul_345 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_366, %view_692), kwargs = {})
%mul_346 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_367, %view_691), kwargs = {})
%add_141 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_345, %mul_346), kwargs = {})
%mul_347 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_368, %view_691), kwargs = {})
%mul_348 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_369, %view_692), kwargs = {})
%sub_41 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_347, %mul_348), kwargs = {})
%mul_349 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_368, %view_692), kwargs = {})
%mul_350 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_369, %view_691), kwargs = {})
%add_142 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_349, %mul_350), kwargs = {})
%stack_40 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_40, %add_141], -1), kwargs = {})
%view_693 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_40, [1, 1, 32, 128]), kwargs = {})
%stack_41 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_41, %add_142], -1), kwargs = {})
%view_694 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_41, [1, 1, 32, 128]), kwargs = {})
%transpose_100 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_693, 1, 2), kwargs = {})
%transpose_101 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_694, 1, 2), kwargs = {})
%transpose_102 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_688, 1, 2), kwargs = {})
%slice_201 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg557_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_202 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_201, 1, 0, 9223372036854775807), kwargs = {})
%view_695 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_101, [32, 1, 128]), kwargs = {})
%index_put_40 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_202, [None, None, %arg840_1], %view_695), kwargs = {})
%slice_203 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg557_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_80 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_203, %index_put_40, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_81 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg557_1, %slice_scatter_80, 0, 0, 9223372036854775807), kwargs = {})
%slice_206 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg558_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_207 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_206, 1, 0, 9223372036854775807), kwargs = {})
%view_696 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_102, [32, 1, 128]), kwargs = {})
%index_put_41 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_207, [None, None, %arg840_1], %view_696), kwargs = {})
%slice_208 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg558_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_82 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_208, %index_put_41, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_83 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg558_1, %slice_scatter_82, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_120 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg559_1, 0), kwargs = {})
%unsqueeze_121 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_120, 1), kwargs = {})
%index_22 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_121, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_123 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_81, 2), kwargs = {})
%expand_161 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_123, [1, 32, 1, 128, 128]), kwargs = {})
%clone_40 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_161,), kwargs = {memory_format: torch.contiguous_format})
%view_697 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_40, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_125 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_83, 2), kwargs = {})
%expand_163 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_125, [1, 32, 1, 128, 128]), kwargs = {})
%clone_41 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_163,), kwargs = {memory_format: torch.contiguous_format})
%view_698 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_41, [1, 32, 128, 128]), kwargs = {})
%zeros_like_20 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_22,), kwargs = {dtype: torch.float32})
%logical_not_20 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_22,), kwargs = {})
%masked_fill_20 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_20, %logical_not_20, -inf), kwargs = {})
%mul_351 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_100, 0.29730177875068026), kwargs = {})
%transpose_103 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_697, -2, -1), kwargs = {})
%mul_352 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_103, 0.29730177875068026), kwargs = {})
%expand_164 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_351, [1, 32, 1, 128]), kwargs = {})
%view_699 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_164, [32, 1, 128]), kwargs = {})
%expand_165 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_352, [1, 32, 128, 128]), kwargs = {})
%view_700 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_165, [32, 128, 128]), kwargs = {})
%bmm_40 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_699, %view_700), kwargs = {})
%view_701 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_40, [1, 32, 1, 128]), kwargs = {})
%add_143 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_701, %masked_fill_20), kwargs = {})
%_softmax_20 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_143, -1, False), kwargs = {})
%expand_166 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_20, [1, 32, 1, 128]), kwargs = {})
%view_702 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_166, [32, 1, 128]), kwargs = {})
%expand_167 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_698, [1, 32, 128, 128]), kwargs = {})
%view_703 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_167, [32, 128, 128]), kwargs = {})
%bmm_41 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_702, %view_703), kwargs = {})
%view_704 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_41, [1, 32, 1, 128]), kwargs = {})
%transpose_104 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_704, 1, 2), kwargs = {})
%view_705 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_104, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_143 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_705, torch.int8), kwargs = {})
%getitem_370 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_143, 0), kwargs = {})
%getitem_371 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_143, 1), kwargs = {})
%quantize_per_token_143 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_705, %getitem_370, %getitem_371, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_143 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_143, %getitem_370, %getitem_371, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_143 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg560_1, %arg561_1, %arg562_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_143 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_143,), kwargs = {})
%view_706 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_143, [1, 4096]), kwargs = {})
%mm_143 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_706, %t_143), kwargs = {})
%view_707 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_143, [1, 1, 4096]), kwargs = {})
%add_144 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_139, %view_707), kwargs = {})
%mul_353 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_144, %add_144), kwargs = {})
%mean_41 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_353, [-1], True), kwargs = {})
%add_145 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_41, 1e-06), kwargs = {})
%rsqrt_41 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_145,), kwargs = {})
%mul_354 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_144, %rsqrt_41), kwargs = {})
%mul_355 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_354, %arg42_1), kwargs = {})
%choose_qparams_per_token_asymmetric_144 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_355, torch.int8), kwargs = {})
%getitem_372 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_144, 0), kwargs = {})
%getitem_373 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_144, 1), kwargs = {})
%quantize_per_token_144 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_355, %getitem_372, %getitem_373, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_144 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_144, %getitem_372, %getitem_373, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_144 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg563_1, %arg564_1, %arg565_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_144 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_144,), kwargs = {})
%view_708 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_144, [1, 4096]), kwargs = {})
%mm_144 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_708, %t_144), kwargs = {})
%view_709 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_144, [1, 1, 11008]), kwargs = {})
%silu_20 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_709,), kwargs = {})
%choose_qparams_per_token_asymmetric_145 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_355, torch.int8), kwargs = {})
%getitem_374 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_145, 0), kwargs = {})
%getitem_375 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_145, 1), kwargs = {})
%quantize_per_token_145 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_355, %getitem_374, %getitem_375, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_145 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_145, %getitem_374, %getitem_375, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_145 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg566_1, %arg567_1, %arg568_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_145 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_145,), kwargs = {})
%view_710 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_145, [1, 4096]), kwargs = {})
%mm_145 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_710, %t_145), kwargs = {})
%view_711 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_145, [1, 1, 11008]), kwargs = {})
%mul_356 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_20, %view_711), kwargs = {})
%choose_qparams_per_token_asymmetric_146 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_356, torch.int8), kwargs = {})
%getitem_376 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_146, 0), kwargs = {})
%getitem_377 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_146, 1), kwargs = {})
%quantize_per_token_146 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_356, %getitem_376, %getitem_377, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_146 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_146, %getitem_376, %getitem_377, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_146 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg569_1, %arg570_1, %arg571_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_146 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_146,), kwargs = {})
%view_712 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_146, [1, 11008]), kwargs = {})
%mm_146 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_712, %t_146), kwargs = {})
%view_713 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_146, [1, 1, 4096]), kwargs = {})
%add_146 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_144, %view_713), kwargs = {})
%mul_357 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_146, %add_146), kwargs = {})
%mean_42 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_357, [-1], True), kwargs = {})
%add_147 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_42, 1e-06), kwargs = {})
%rsqrt_42 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_147,), kwargs = {})
%mul_358 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_146, %rsqrt_42), kwargs = {})
%mul_359 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_358, %arg43_1), kwargs = {})
%choose_qparams_per_token_asymmetric_147 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_359, torch.int8), kwargs = {})
%getitem_378 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_147, 0), kwargs = {})
%getitem_379 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_147, 1), kwargs = {})
%quantize_per_token_147 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_359, %getitem_378, %getitem_379, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_147 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_147, %getitem_378, %getitem_379, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_147 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg572_1, %arg573_1, %arg574_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_147 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_147,), kwargs = {})
%view_714 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_147, [1, 4096]), kwargs = {})
%mm_147 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_714, %t_147), kwargs = {})
%view_715 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_147, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_148 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_359, torch.int8), kwargs = {})
%getitem_380 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_148, 0), kwargs = {})
%getitem_381 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_148, 1), kwargs = {})
%quantize_per_token_148 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_359, %getitem_380, %getitem_381, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_148 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_148, %getitem_380, %getitem_381, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_148 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg575_1, %arg576_1, %arg577_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_148 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_148,), kwargs = {})
%view_716 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_148, [1, 4096]), kwargs = {})
%mm_148 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_716, %t_148), kwargs = {})
%view_717 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_148, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_149 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_359, torch.int8), kwargs = {})
%getitem_382 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_149, 0), kwargs = {})
%getitem_383 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_149, 1), kwargs = {})
%quantize_per_token_149 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_359, %getitem_382, %getitem_383, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_149 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_149, %getitem_382, %getitem_383, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_149 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg578_1, %arg579_1, %arg580_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_149 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_149,), kwargs = {})
%view_718 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_149, [1, 4096]), kwargs = {})
%mm_149 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_718, %t_149), kwargs = {})
%view_719 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_149, [1, 1, 4096]), kwargs = {})
%view_720 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_715, [1, 1, 32, 128]), kwargs = {})
%view_721 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_717, [1, 1, 32, 128]), kwargs = {})
%view_722 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_719, [1, 1, 32, 128]), kwargs = {})
%view_723 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_720, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_42 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_723, -1), kwargs = {})
%getitem_384 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_42, 0), kwargs = {})
%getitem_385 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_42, 1), kwargs = {})
%view_724 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_721, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_43 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_724, -1), kwargs = {})
%getitem_386 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_43, 0), kwargs = {})
%getitem_387 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_43, 1), kwargs = {})
%view_725 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_726 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_360 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_384, %view_725), kwargs = {})
%mul_361 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_385, %view_726), kwargs = {})
%sub_42 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_360, %mul_361), kwargs = {})
%mul_362 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_384, %view_726), kwargs = {})
%mul_363 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_385, %view_725), kwargs = {})
%add_148 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_362, %mul_363), kwargs = {})
%mul_364 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_386, %view_725), kwargs = {})
%mul_365 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_387, %view_726), kwargs = {})
%sub_43 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_364, %mul_365), kwargs = {})
%mul_366 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_386, %view_726), kwargs = {})
%mul_367 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_387, %view_725), kwargs = {})
%add_149 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_366, %mul_367), kwargs = {})
%stack_42 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_42, %add_148], -1), kwargs = {})
%view_727 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_42, [1, 1, 32, 128]), kwargs = {})
%stack_43 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_43, %add_149], -1), kwargs = {})
%view_728 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_43, [1, 1, 32, 128]), kwargs = {})
%transpose_105 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_727, 1, 2), kwargs = {})
%transpose_106 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_728, 1, 2), kwargs = {})
%transpose_107 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_722, 1, 2), kwargs = {})
%slice_211 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg581_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_212 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_211, 1, 0, 9223372036854775807), kwargs = {})
%view_729 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_106, [32, 1, 128]), kwargs = {})
%index_put_42 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_212, [None, None, %arg840_1], %view_729), kwargs = {})
%slice_213 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg581_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_84 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_213, %index_put_42, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_85 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg581_1, %slice_scatter_84, 0, 0, 9223372036854775807), kwargs = {})
%slice_216 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg582_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_217 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_216, 1, 0, 9223372036854775807), kwargs = {})
%view_730 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_107, [32, 1, 128]), kwargs = {})
%index_put_43 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_217, [None, None, %arg840_1], %view_730), kwargs = {})
%slice_218 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg582_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_86 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_218, %index_put_43, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_87 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg582_1, %slice_scatter_86, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_126 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg583_1, 0), kwargs = {})
%unsqueeze_127 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_126, 1), kwargs = {})
%index_23 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_127, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_129 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_85, 2), kwargs = {})
%expand_169 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_129, [1, 32, 1, 128, 128]), kwargs = {})
%clone_42 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_169,), kwargs = {memory_format: torch.contiguous_format})
%view_731 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_42, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_131 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_87, 2), kwargs = {})
%expand_171 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_131, [1, 32, 1, 128, 128]), kwargs = {})
%clone_43 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_171,), kwargs = {memory_format: torch.contiguous_format})
%view_732 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_43, [1, 32, 128, 128]), kwargs = {})
%zeros_like_21 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_23,), kwargs = {dtype: torch.float32})
%logical_not_21 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_23,), kwargs = {})
%masked_fill_21 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_21, %logical_not_21, -inf), kwargs = {})
%mul_368 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_105, 0.29730177875068026), kwargs = {})
%transpose_108 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_731, -2, -1), kwargs = {})
%mul_369 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_108, 0.29730177875068026), kwargs = {})
%expand_172 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_368, [1, 32, 1, 128]), kwargs = {})
%view_733 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_172, [32, 1, 128]), kwargs = {})
%expand_173 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_369, [1, 32, 128, 128]), kwargs = {})
%view_734 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_173, [32, 128, 128]), kwargs = {})
%bmm_42 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_733, %view_734), kwargs = {})
%view_735 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_42, [1, 32, 1, 128]), kwargs = {})
%add_150 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_735, %masked_fill_21), kwargs = {})
%_softmax_21 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_150, -1, False), kwargs = {})
%expand_174 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_21, [1, 32, 1, 128]), kwargs = {})
%view_736 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_174, [32, 1, 128]), kwargs = {})
%expand_175 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_732, [1, 32, 128, 128]), kwargs = {})
%view_737 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_175, [32, 128, 128]), kwargs = {})
%bmm_43 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_736, %view_737), kwargs = {})
%view_738 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_43, [1, 32, 1, 128]), kwargs = {})
%transpose_109 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_738, 1, 2), kwargs = {})
%view_739 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_109, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_150 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_739, torch.int8), kwargs = {})
%getitem_388 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_150, 0), kwargs = {})
%getitem_389 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_150, 1), kwargs = {})
%quantize_per_token_150 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_739, %getitem_388, %getitem_389, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_150 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_150, %getitem_388, %getitem_389, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_150 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg584_1, %arg585_1, %arg586_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_150 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_150,), kwargs = {})
%view_740 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_150, [1, 4096]), kwargs = {})
%mm_150 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_740, %t_150), kwargs = {})
%view_741 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_150, [1, 1, 4096]), kwargs = {})
%add_151 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_146, %view_741), kwargs = {})
%mul_370 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_151, %add_151), kwargs = {})
%mean_43 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_370, [-1], True), kwargs = {})
%add_152 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_43, 1e-06), kwargs = {})
%rsqrt_43 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_152,), kwargs = {})
%mul_371 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_151, %rsqrt_43), kwargs = {})
%mul_372 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_371, %arg44_1), kwargs = {})
%choose_qparams_per_token_asymmetric_151 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_372, torch.int8), kwargs = {})
%getitem_390 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_151, 0), kwargs = {})
%getitem_391 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_151, 1), kwargs = {})
%quantize_per_token_151 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_372, %getitem_390, %getitem_391, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_151 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_151, %getitem_390, %getitem_391, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_151 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg587_1, %arg588_1, %arg589_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_151 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_151,), kwargs = {})
%view_742 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_151, [1, 4096]), kwargs = {})
%mm_151 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_742, %t_151), kwargs = {})
%view_743 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_151, [1, 1, 11008]), kwargs = {})
%silu_21 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_743,), kwargs = {})
%choose_qparams_per_token_asymmetric_152 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_372, torch.int8), kwargs = {})
%getitem_392 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_152, 0), kwargs = {})
%getitem_393 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_152, 1), kwargs = {})
%quantize_per_token_152 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_372, %getitem_392, %getitem_393, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_152 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_152, %getitem_392, %getitem_393, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_152 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg590_1, %arg591_1, %arg592_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_152 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_152,), kwargs = {})
%view_744 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_152, [1, 4096]), kwargs = {})
%mm_152 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_744, %t_152), kwargs = {})
%view_745 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_152, [1, 1, 11008]), kwargs = {})
%mul_373 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_21, %view_745), kwargs = {})
%choose_qparams_per_token_asymmetric_153 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_373, torch.int8), kwargs = {})
%getitem_394 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_153, 0), kwargs = {})
%getitem_395 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_153, 1), kwargs = {})
%quantize_per_token_153 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_373, %getitem_394, %getitem_395, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_153 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_153, %getitem_394, %getitem_395, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_153 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg593_1, %arg594_1, %arg595_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_153 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_153,), kwargs = {})
%view_746 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_153, [1, 11008]), kwargs = {})
%mm_153 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_746, %t_153), kwargs = {})
%view_747 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_153, [1, 1, 4096]), kwargs = {})
%add_153 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_151, %view_747), kwargs = {})
%mul_374 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_153, %add_153), kwargs = {})
%mean_44 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_374, [-1], True), kwargs = {})
%add_154 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_44, 1e-06), kwargs = {})
%rsqrt_44 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_154,), kwargs = {})
%mul_375 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_153, %rsqrt_44), kwargs = {})
%mul_376 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_375, %arg45_1), kwargs = {})
%choose_qparams_per_token_asymmetric_154 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_376, torch.int8), kwargs = {})
%getitem_396 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_154, 0), kwargs = {})
%getitem_397 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_154, 1), kwargs = {})
%quantize_per_token_154 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_376, %getitem_396, %getitem_397, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_154 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_154, %getitem_396, %getitem_397, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_154 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg596_1, %arg597_1, %arg598_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_154 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_154,), kwargs = {})
%view_748 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_154, [1, 4096]), kwargs = {})
%mm_154 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_748, %t_154), kwargs = {})
%view_749 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_154, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_155 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_376, torch.int8), kwargs = {})
%getitem_398 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_155, 0), kwargs = {})
%getitem_399 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_155, 1), kwargs = {})
%quantize_per_token_155 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_376, %getitem_398, %getitem_399, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_155 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_155, %getitem_398, %getitem_399, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_155 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg599_1, %arg600_1, %arg601_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_155 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_155,), kwargs = {})
%view_750 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_155, [1, 4096]), kwargs = {})
%mm_155 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_750, %t_155), kwargs = {})
%view_751 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_155, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_156 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_376, torch.int8), kwargs = {})
%getitem_400 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_156, 0), kwargs = {})
%getitem_401 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_156, 1), kwargs = {})
%quantize_per_token_156 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_376, %getitem_400, %getitem_401, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_156 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_156, %getitem_400, %getitem_401, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_156 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg602_1, %arg603_1, %arg604_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_156 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_156,), kwargs = {})
%view_752 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_156, [1, 4096]), kwargs = {})
%mm_156 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_752, %t_156), kwargs = {})
%view_753 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_156, [1, 1, 4096]), kwargs = {})
%view_754 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_749, [1, 1, 32, 128]), kwargs = {})
%view_755 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_751, [1, 1, 32, 128]), kwargs = {})
%view_756 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_753, [1, 1, 32, 128]), kwargs = {})
%view_757 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_754, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_44 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_757, -1), kwargs = {})
%getitem_402 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_44, 0), kwargs = {})
%getitem_403 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_44, 1), kwargs = {})
%view_758 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_755, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_45 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_758, -1), kwargs = {})
%getitem_404 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_45, 0), kwargs = {})
%getitem_405 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_45, 1), kwargs = {})
%view_759 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_760 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_377 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_402, %view_759), kwargs = {})
%mul_378 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_403, %view_760), kwargs = {})
%sub_44 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_377, %mul_378), kwargs = {})
%mul_379 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_402, %view_760), kwargs = {})
%mul_380 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_403, %view_759), kwargs = {})
%add_155 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_379, %mul_380), kwargs = {})
%mul_381 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_404, %view_759), kwargs = {})
%mul_382 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_405, %view_760), kwargs = {})
%sub_45 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_381, %mul_382), kwargs = {})
%mul_383 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_404, %view_760), kwargs = {})
%mul_384 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_405, %view_759), kwargs = {})
%add_156 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_383, %mul_384), kwargs = {})
%stack_44 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_44, %add_155], -1), kwargs = {})
%view_761 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_44, [1, 1, 32, 128]), kwargs = {})
%stack_45 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_45, %add_156], -1), kwargs = {})
%view_762 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_45, [1, 1, 32, 128]), kwargs = {})
%transpose_110 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_761, 1, 2), kwargs = {})
%transpose_111 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_762, 1, 2), kwargs = {})
%transpose_112 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_756, 1, 2), kwargs = {})
%slice_221 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg605_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_222 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_221, 1, 0, 9223372036854775807), kwargs = {})
%view_763 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_111, [32, 1, 128]), kwargs = {})
%index_put_44 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_222, [None, None, %arg840_1], %view_763), kwargs = {})
%slice_223 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg605_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_88 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_223, %index_put_44, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_89 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg605_1, %slice_scatter_88, 0, 0, 9223372036854775807), kwargs = {})
%slice_226 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg606_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_227 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_226, 1, 0, 9223372036854775807), kwargs = {})
%view_764 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_112, [32, 1, 128]), kwargs = {})
%index_put_45 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_227, [None, None, %arg840_1], %view_764), kwargs = {})
%slice_228 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg606_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_90 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_228, %index_put_45, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_91 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg606_1, %slice_scatter_90, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_132 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg607_1, 0), kwargs = {})
%unsqueeze_133 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_132, 1), kwargs = {})
%index_24 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_133, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_135 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_89, 2), kwargs = {})
%expand_177 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_135, [1, 32, 1, 128, 128]), kwargs = {})
%clone_44 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_177,), kwargs = {memory_format: torch.contiguous_format})
%view_765 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_44, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_137 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_91, 2), kwargs = {})
%expand_179 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_137, [1, 32, 1, 128, 128]), kwargs = {})
%clone_45 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_179,), kwargs = {memory_format: torch.contiguous_format})
%view_766 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_45, [1, 32, 128, 128]), kwargs = {})
%zeros_like_22 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_24,), kwargs = {dtype: torch.float32})
%logical_not_22 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_24,), kwargs = {})
%masked_fill_22 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_22, %logical_not_22, -inf), kwargs = {})
%mul_385 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_110, 0.29730177875068026), kwargs = {})
%transpose_113 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_765, -2, -1), kwargs = {})
%mul_386 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_113, 0.29730177875068026), kwargs = {})
%expand_180 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_385, [1, 32, 1, 128]), kwargs = {})
%view_767 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_180, [32, 1, 128]), kwargs = {})
%expand_181 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_386, [1, 32, 128, 128]), kwargs = {})
%view_768 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_181, [32, 128, 128]), kwargs = {})
%bmm_44 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_767, %view_768), kwargs = {})
%view_769 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_44, [1, 32, 1, 128]), kwargs = {})
%add_157 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_769, %masked_fill_22), kwargs = {})
%_softmax_22 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_157, -1, False), kwargs = {})
%expand_182 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_22, [1, 32, 1, 128]), kwargs = {})
%view_770 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_182, [32, 1, 128]), kwargs = {})
%expand_183 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_766, [1, 32, 128, 128]), kwargs = {})
%view_771 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_183, [32, 128, 128]), kwargs = {})
%bmm_45 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_770, %view_771), kwargs = {})
%view_772 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_45, [1, 32, 1, 128]), kwargs = {})
%transpose_114 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_772, 1, 2), kwargs = {})
%view_773 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_114, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_157 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_773, torch.int8), kwargs = {})
%getitem_406 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_157, 0), kwargs = {})
%getitem_407 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_157, 1), kwargs = {})
%quantize_per_token_157 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_773, %getitem_406, %getitem_407, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_157 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_157, %getitem_406, %getitem_407, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_157 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg608_1, %arg609_1, %arg610_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_157 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_157,), kwargs = {})
%view_774 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_157, [1, 4096]), kwargs = {})
%mm_157 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_774, %t_157), kwargs = {})
%view_775 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_157, [1, 1, 4096]), kwargs = {})
%add_158 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_153, %view_775), kwargs = {})
%mul_387 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_158, %add_158), kwargs = {})
%mean_45 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_387, [-1], True), kwargs = {})
%add_159 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_45, 1e-06), kwargs = {})
%rsqrt_45 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_159,), kwargs = {})
%mul_388 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_158, %rsqrt_45), kwargs = {})
%mul_389 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_388, %arg46_1), kwargs = {})
%choose_qparams_per_token_asymmetric_158 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_389, torch.int8), kwargs = {})
%getitem_408 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_158, 0), kwargs = {})
%getitem_409 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_158, 1), kwargs = {})
%quantize_per_token_158 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_389, %getitem_408, %getitem_409, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_158 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_158, %getitem_408, %getitem_409, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_158 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg611_1, %arg612_1, %arg613_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_158 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_158,), kwargs = {})
%view_776 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_158, [1, 4096]), kwargs = {})
%mm_158 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_776, %t_158), kwargs = {})
%view_777 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_158, [1, 1, 11008]), kwargs = {})
%silu_22 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_777,), kwargs = {})
%choose_qparams_per_token_asymmetric_159 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_389, torch.int8), kwargs = {})
%getitem_410 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_159, 0), kwargs = {})
%getitem_411 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_159, 1), kwargs = {})
%quantize_per_token_159 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_389, %getitem_410, %getitem_411, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_159 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_159, %getitem_410, %getitem_411, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_159 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg614_1, %arg615_1, %arg616_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_159 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_159,), kwargs = {})
%view_778 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_159, [1, 4096]), kwargs = {})
%mm_159 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_778, %t_159), kwargs = {})
%view_779 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_159, [1, 1, 11008]), kwargs = {})
%mul_390 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_22, %view_779), kwargs = {})
%choose_qparams_per_token_asymmetric_160 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_390, torch.int8), kwargs = {})
%getitem_412 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_160, 0), kwargs = {})
%getitem_413 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_160, 1), kwargs = {})
%quantize_per_token_160 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_390, %getitem_412, %getitem_413, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_160 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_160, %getitem_412, %getitem_413, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_160 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg617_1, %arg618_1, %arg619_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_160 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_160,), kwargs = {})
%view_780 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_160, [1, 11008]), kwargs = {})
%mm_160 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_780, %t_160), kwargs = {})
%view_781 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_160, [1, 1, 4096]), kwargs = {})
%add_160 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_158, %view_781), kwargs = {})
%mul_391 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_160, %add_160), kwargs = {})
%mean_46 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_391, [-1], True), kwargs = {})
%add_161 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_46, 1e-06), kwargs = {})
%rsqrt_46 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_161,), kwargs = {})
%mul_392 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_160, %rsqrt_46), kwargs = {})
%mul_393 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_392, %arg47_1), kwargs = {})
%choose_qparams_per_token_asymmetric_161 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_393, torch.int8), kwargs = {})
%getitem_414 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_161, 0), kwargs = {})
%getitem_415 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_161, 1), kwargs = {})
%quantize_per_token_161 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_393, %getitem_414, %getitem_415, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_161 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_161, %getitem_414, %getitem_415, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_161 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg620_1, %arg621_1, %arg622_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_161 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_161,), kwargs = {})
%view_782 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_161, [1, 4096]), kwargs = {})
%mm_161 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_782, %t_161), kwargs = {})
%view_783 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_161, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_162 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_393, torch.int8), kwargs = {})
%getitem_416 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_162, 0), kwargs = {})
%getitem_417 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_162, 1), kwargs = {})
%quantize_per_token_162 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_393, %getitem_416, %getitem_417, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_162 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_162, %getitem_416, %getitem_417, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_162 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg623_1, %arg624_1, %arg625_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_162 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_162,), kwargs = {})
%view_784 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_162, [1, 4096]), kwargs = {})
%mm_162 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_784, %t_162), kwargs = {})
%view_785 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_162, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_163 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_393, torch.int8), kwargs = {})
%getitem_418 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_163, 0), kwargs = {})
%getitem_419 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_163, 1), kwargs = {})
%quantize_per_token_163 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_393, %getitem_418, %getitem_419, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_163 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_163, %getitem_418, %getitem_419, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_163 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg626_1, %arg627_1, %arg628_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_163 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_163,), kwargs = {})
%view_786 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_163, [1, 4096]), kwargs = {})
%mm_163 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_786, %t_163), kwargs = {})
%view_787 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_163, [1, 1, 4096]), kwargs = {})
%view_788 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_783, [1, 1, 32, 128]), kwargs = {})
%view_789 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_785, [1, 1, 32, 128]), kwargs = {})
%view_790 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_787, [1, 1, 32, 128]), kwargs = {})
%view_791 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_788, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_46 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_791, -1), kwargs = {})
%getitem_420 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_46, 0), kwargs = {})
%getitem_421 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_46, 1), kwargs = {})
%view_792 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_789, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_47 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_792, -1), kwargs = {})
%getitem_422 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_47, 0), kwargs = {})
%getitem_423 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_47, 1), kwargs = {})
%view_793 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_794 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_394 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_420, %view_793), kwargs = {})
%mul_395 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_421, %view_794), kwargs = {})
%sub_46 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_394, %mul_395), kwargs = {})
%mul_396 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_420, %view_794), kwargs = {})
%mul_397 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_421, %view_793), kwargs = {})
%add_162 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_396, %mul_397), kwargs = {})
%mul_398 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_422, %view_793), kwargs = {})
%mul_399 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_423, %view_794), kwargs = {})
%sub_47 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_398, %mul_399), kwargs = {})
%mul_400 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_422, %view_794), kwargs = {})
%mul_401 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_423, %view_793), kwargs = {})
%add_163 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_400, %mul_401), kwargs = {})
%stack_46 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_46, %add_162], -1), kwargs = {})
%view_795 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_46, [1, 1, 32, 128]), kwargs = {})
%stack_47 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_47, %add_163], -1), kwargs = {})
%view_796 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_47, [1, 1, 32, 128]), kwargs = {})
%transpose_115 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_795, 1, 2), kwargs = {})
%transpose_116 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_796, 1, 2), kwargs = {})
%transpose_117 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_790, 1, 2), kwargs = {})
%slice_231 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg629_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_232 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_231, 1, 0, 9223372036854775807), kwargs = {})
%view_797 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_116, [32, 1, 128]), kwargs = {})
%index_put_46 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_232, [None, None, %arg840_1], %view_797), kwargs = {})
%slice_233 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg629_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_92 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_233, %index_put_46, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_93 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg629_1, %slice_scatter_92, 0, 0, 9223372036854775807), kwargs = {})
%slice_236 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg630_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_237 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_236, 1, 0, 9223372036854775807), kwargs = {})
%view_798 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_117, [32, 1, 128]), kwargs = {})
%index_put_47 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_237, [None, None, %arg840_1], %view_798), kwargs = {})
%slice_238 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg630_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_94 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_238, %index_put_47, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_95 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg630_1, %slice_scatter_94, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_138 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg631_1, 0), kwargs = {})
%unsqueeze_139 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_138, 1), kwargs = {})
%index_25 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_139, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_141 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_93, 2), kwargs = {})
%expand_185 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_141, [1, 32, 1, 128, 128]), kwargs = {})
%clone_46 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_185,), kwargs = {memory_format: torch.contiguous_format})
%view_799 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_46, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_143 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_95, 2), kwargs = {})
%expand_187 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_143, [1, 32, 1, 128, 128]), kwargs = {})
%clone_47 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_187,), kwargs = {memory_format: torch.contiguous_format})
%view_800 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_47, [1, 32, 128, 128]), kwargs = {})
%zeros_like_23 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_25,), kwargs = {dtype: torch.float32})
%logical_not_23 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_25,), kwargs = {})
%masked_fill_23 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_23, %logical_not_23, -inf), kwargs = {})
%mul_402 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_115, 0.29730177875068026), kwargs = {})
%transpose_118 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_799, -2, -1), kwargs = {})
%mul_403 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_118, 0.29730177875068026), kwargs = {})
%expand_188 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_402, [1, 32, 1, 128]), kwargs = {})
%view_801 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_188, [32, 1, 128]), kwargs = {})
%expand_189 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_403, [1, 32, 128, 128]), kwargs = {})
%view_802 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_189, [32, 128, 128]), kwargs = {})
%bmm_46 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_801, %view_802), kwargs = {})
%view_803 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_46, [1, 32, 1, 128]), kwargs = {})
%add_164 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_803, %masked_fill_23), kwargs = {})
%_softmax_23 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_164, -1, False), kwargs = {})
%expand_190 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_23, [1, 32, 1, 128]), kwargs = {})
%view_804 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_190, [32, 1, 128]), kwargs = {})
%expand_191 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_800, [1, 32, 128, 128]), kwargs = {})
%view_805 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_191, [32, 128, 128]), kwargs = {})
%bmm_47 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_804, %view_805), kwargs = {})
%view_806 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_47, [1, 32, 1, 128]), kwargs = {})
%transpose_119 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_806, 1, 2), kwargs = {})
%view_807 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_119, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_164 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_807, torch.int8), kwargs = {})
%getitem_424 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_164, 0), kwargs = {})
%getitem_425 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_164, 1), kwargs = {})
%quantize_per_token_164 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_807, %getitem_424, %getitem_425, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_164 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_164, %getitem_424, %getitem_425, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_164 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg632_1, %arg633_1, %arg634_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_164 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_164,), kwargs = {})
%view_808 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_164, [1, 4096]), kwargs = {})
%mm_164 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_808, %t_164), kwargs = {})
%view_809 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_164, [1, 1, 4096]), kwargs = {})
%add_165 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_160, %view_809), kwargs = {})
%mul_404 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_165, %add_165), kwargs = {})
%mean_47 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_404, [-1], True), kwargs = {})
%add_166 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_47, 1e-06), kwargs = {})
%rsqrt_47 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_166,), kwargs = {})
%mul_405 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_165, %rsqrt_47), kwargs = {})
%mul_406 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_405, %arg48_1), kwargs = {})
%choose_qparams_per_token_asymmetric_165 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_406, torch.int8), kwargs = {})
%getitem_426 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_165, 0), kwargs = {})
%getitem_427 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_165, 1), kwargs = {})
%quantize_per_token_165 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_406, %getitem_426, %getitem_427, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_165 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_165, %getitem_426, %getitem_427, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_165 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg635_1, %arg636_1, %arg637_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_165 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_165,), kwargs = {})
%view_810 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_165, [1, 4096]), kwargs = {})
%mm_165 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_810, %t_165), kwargs = {})
%view_811 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_165, [1, 1, 11008]), kwargs = {})
%silu_23 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_811,), kwargs = {})
%choose_qparams_per_token_asymmetric_166 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_406, torch.int8), kwargs = {})
%getitem_428 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_166, 0), kwargs = {})
%getitem_429 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_166, 1), kwargs = {})
%quantize_per_token_166 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_406, %getitem_428, %getitem_429, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_166 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_166, %getitem_428, %getitem_429, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_166 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg638_1, %arg639_1, %arg640_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_166 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_166,), kwargs = {})
%view_812 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_166, [1, 4096]), kwargs = {})
%mm_166 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_812, %t_166), kwargs = {})
%view_813 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_166, [1, 1, 11008]), kwargs = {})
%mul_407 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_23, %view_813), kwargs = {})
%choose_qparams_per_token_asymmetric_167 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_407, torch.int8), kwargs = {})
%getitem_430 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_167, 0), kwargs = {})
%getitem_431 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_167, 1), kwargs = {})
%quantize_per_token_167 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_407, %getitem_430, %getitem_431, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_167 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_167, %getitem_430, %getitem_431, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_167 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg641_1, %arg642_1, %arg643_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_167 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_167,), kwargs = {})
%view_814 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_167, [1, 11008]), kwargs = {})
%mm_167 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_814, %t_167), kwargs = {})
%view_815 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_167, [1, 1, 4096]), kwargs = {})
%add_167 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_165, %view_815), kwargs = {})
%mul_408 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_167, %add_167), kwargs = {})
%mean_48 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_408, [-1], True), kwargs = {})
%add_168 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_48, 1e-06), kwargs = {})
%rsqrt_48 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_168,), kwargs = {})
%mul_409 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_167, %rsqrt_48), kwargs = {})
%mul_410 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_409, %arg49_1), kwargs = {})
%choose_qparams_per_token_asymmetric_168 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_410, torch.int8), kwargs = {})
%getitem_432 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_168, 0), kwargs = {})
%getitem_433 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_168, 1), kwargs = {})
%quantize_per_token_168 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_410, %getitem_432, %getitem_433, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_168 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_168, %getitem_432, %getitem_433, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_168 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg644_1, %arg645_1, %arg646_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_168 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_168,), kwargs = {})
%view_816 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_168, [1, 4096]), kwargs = {})
%mm_168 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_816, %t_168), kwargs = {})
%view_817 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_168, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_169 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_410, torch.int8), kwargs = {})
%getitem_434 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_169, 0), kwargs = {})
%getitem_435 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_169, 1), kwargs = {})
%quantize_per_token_169 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_410, %getitem_434, %getitem_435, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_169 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_169, %getitem_434, %getitem_435, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_169 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg647_1, %arg648_1, %arg649_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_169 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_169,), kwargs = {})
%view_818 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_169, [1, 4096]), kwargs = {})
%mm_169 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_818, %t_169), kwargs = {})
%view_819 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_169, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_170 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_410, torch.int8), kwargs = {})
%getitem_436 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_170, 0), kwargs = {})
%getitem_437 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_170, 1), kwargs = {})
%quantize_per_token_170 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_410, %getitem_436, %getitem_437, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_170 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_170, %getitem_436, %getitem_437, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_170 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg650_1, %arg651_1, %arg652_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_170 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_170,), kwargs = {})
%view_820 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_170, [1, 4096]), kwargs = {})
%mm_170 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_820, %t_170), kwargs = {})
%view_821 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_170, [1, 1, 4096]), kwargs = {})
%view_822 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_817, [1, 1, 32, 128]), kwargs = {})
%view_823 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_819, [1, 1, 32, 128]), kwargs = {})
%view_824 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_821, [1, 1, 32, 128]), kwargs = {})
%view_825 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_822, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_48 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_825, -1), kwargs = {})
%getitem_438 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_48, 0), kwargs = {})
%getitem_439 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_48, 1), kwargs = {})
%view_826 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_823, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_49 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_826, -1), kwargs = {})
%getitem_440 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_49, 0), kwargs = {})
%getitem_441 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_49, 1), kwargs = {})
%view_827 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_828 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_411 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_438, %view_827), kwargs = {})
%mul_412 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_439, %view_828), kwargs = {})
%sub_48 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_411, %mul_412), kwargs = {})
%mul_413 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_438, %view_828), kwargs = {})
%mul_414 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_439, %view_827), kwargs = {})
%add_169 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_413, %mul_414), kwargs = {})
%mul_415 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_440, %view_827), kwargs = {})
%mul_416 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_441, %view_828), kwargs = {})
%sub_49 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_415, %mul_416), kwargs = {})
%mul_417 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_440, %view_828), kwargs = {})
%mul_418 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_441, %view_827), kwargs = {})
%add_170 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_417, %mul_418), kwargs = {})
%stack_48 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_48, %add_169], -1), kwargs = {})
%view_829 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_48, [1, 1, 32, 128]), kwargs = {})
%stack_49 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_49, %add_170], -1), kwargs = {})
%view_830 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_49, [1, 1, 32, 128]), kwargs = {})
%transpose_120 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_829, 1, 2), kwargs = {})
%transpose_121 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_830, 1, 2), kwargs = {})
%transpose_122 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_824, 1, 2), kwargs = {})
%slice_241 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg653_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_242 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_241, 1, 0, 9223372036854775807), kwargs = {})
%view_831 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_121, [32, 1, 128]), kwargs = {})
%index_put_48 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_242, [None, None, %arg840_1], %view_831), kwargs = {})
%slice_243 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg653_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_96 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_243, %index_put_48, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_97 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg653_1, %slice_scatter_96, 0, 0, 9223372036854775807), kwargs = {})
%slice_246 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg654_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_247 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_246, 1, 0, 9223372036854775807), kwargs = {})
%view_832 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_122, [32, 1, 128]), kwargs = {})
%index_put_49 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_247, [None, None, %arg840_1], %view_832), kwargs = {})
%slice_248 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg654_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_98 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_248, %index_put_49, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_99 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg654_1, %slice_scatter_98, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_144 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg655_1, 0), kwargs = {})
%unsqueeze_145 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_144, 1), kwargs = {})
%index_26 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_145, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_147 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_97, 2), kwargs = {})
%expand_193 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_147, [1, 32, 1, 128, 128]), kwargs = {})
%clone_48 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_193,), kwargs = {memory_format: torch.contiguous_format})
%view_833 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_48, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_149 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_99, 2), kwargs = {})
%expand_195 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_149, [1, 32, 1, 128, 128]), kwargs = {})
%clone_49 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_195,), kwargs = {memory_format: torch.contiguous_format})
%view_834 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_49, [1, 32, 128, 128]), kwargs = {})
%zeros_like_24 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_26,), kwargs = {dtype: torch.float32})
%logical_not_24 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_26,), kwargs = {})
%masked_fill_24 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_24, %logical_not_24, -inf), kwargs = {})
%mul_419 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_120, 0.29730177875068026), kwargs = {})
%transpose_123 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_833, -2, -1), kwargs = {})
%mul_420 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_123, 0.29730177875068026), kwargs = {})
%expand_196 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_419, [1, 32, 1, 128]), kwargs = {})
%view_835 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_196, [32, 1, 128]), kwargs = {})
%expand_197 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_420, [1, 32, 128, 128]), kwargs = {})
%view_836 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_197, [32, 128, 128]), kwargs = {})
%bmm_48 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_835, %view_836), kwargs = {})
%view_837 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_48, [1, 32, 1, 128]), kwargs = {})
%add_171 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_837, %masked_fill_24), kwargs = {})
%_softmax_24 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_171, -1, False), kwargs = {})
%expand_198 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_24, [1, 32, 1, 128]), kwargs = {})
%view_838 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_198, [32, 1, 128]), kwargs = {})
%expand_199 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_834, [1, 32, 128, 128]), kwargs = {})
%view_839 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_199, [32, 128, 128]), kwargs = {})
%bmm_49 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_838, %view_839), kwargs = {})
%view_840 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_49, [1, 32, 1, 128]), kwargs = {})
%transpose_124 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_840, 1, 2), kwargs = {})
%view_841 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_124, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_171 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_841, torch.int8), kwargs = {})
%getitem_442 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_171, 0), kwargs = {})
%getitem_443 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_171, 1), kwargs = {})
%quantize_per_token_171 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_841, %getitem_442, %getitem_443, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_171 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_171, %getitem_442, %getitem_443, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_171 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg656_1, %arg657_1, %arg658_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_171 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_171,), kwargs = {})
%view_842 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_171, [1, 4096]), kwargs = {})
%mm_171 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_842, %t_171), kwargs = {})
%view_843 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_171, [1, 1, 4096]), kwargs = {})
%add_172 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_167, %view_843), kwargs = {})
%mul_421 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_172, %add_172), kwargs = {})
%mean_49 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_421, [-1], True), kwargs = {})
%add_173 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_49, 1e-06), kwargs = {})
%rsqrt_49 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_173,), kwargs = {})
%mul_422 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_172, %rsqrt_49), kwargs = {})
%mul_423 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_422, %arg50_1), kwargs = {})
%choose_qparams_per_token_asymmetric_172 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_423, torch.int8), kwargs = {})
%getitem_444 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_172, 0), kwargs = {})
%getitem_445 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_172, 1), kwargs = {})
%quantize_per_token_172 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_423, %getitem_444, %getitem_445, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_172 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_172, %getitem_444, %getitem_445, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_172 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg659_1, %arg660_1, %arg661_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_172 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_172,), kwargs = {})
%view_844 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_172, [1, 4096]), kwargs = {})
%mm_172 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_844, %t_172), kwargs = {})
%view_845 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_172, [1, 1, 11008]), kwargs = {})
%silu_24 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_845,), kwargs = {})
%choose_qparams_per_token_asymmetric_173 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_423, torch.int8), kwargs = {})
%getitem_446 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_173, 0), kwargs = {})
%getitem_447 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_173, 1), kwargs = {})
%quantize_per_token_173 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_423, %getitem_446, %getitem_447, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_173 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_173, %getitem_446, %getitem_447, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_173 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg662_1, %arg663_1, %arg664_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_173 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_173,), kwargs = {})
%view_846 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_173, [1, 4096]), kwargs = {})
%mm_173 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_846, %t_173), kwargs = {})
%view_847 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_173, [1, 1, 11008]), kwargs = {})
%mul_424 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_24, %view_847), kwargs = {})
%choose_qparams_per_token_asymmetric_174 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_424, torch.int8), kwargs = {})
%getitem_448 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_174, 0), kwargs = {})
%getitem_449 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_174, 1), kwargs = {})
%quantize_per_token_174 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_424, %getitem_448, %getitem_449, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_174 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_174, %getitem_448, %getitem_449, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_174 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg665_1, %arg666_1, %arg667_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_174 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_174,), kwargs = {})
%view_848 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_174, [1, 11008]), kwargs = {})
%mm_174 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_848, %t_174), kwargs = {})
%view_849 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_174, [1, 1, 4096]), kwargs = {})
%add_174 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_172, %view_849), kwargs = {})
%mul_425 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_174, %add_174), kwargs = {})
%mean_50 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_425, [-1], True), kwargs = {})
%add_175 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_50, 1e-06), kwargs = {})
%rsqrt_50 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_175,), kwargs = {})
%mul_426 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_174, %rsqrt_50), kwargs = {})
%mul_427 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_426, %arg51_1), kwargs = {})
%choose_qparams_per_token_asymmetric_175 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_427, torch.int8), kwargs = {})
%getitem_450 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_175, 0), kwargs = {})
%getitem_451 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_175, 1), kwargs = {})
%quantize_per_token_175 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_427, %getitem_450, %getitem_451, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_175 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_175, %getitem_450, %getitem_451, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_175 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg668_1, %arg669_1, %arg670_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_175 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_175,), kwargs = {})
%view_850 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_175, [1, 4096]), kwargs = {})
%mm_175 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_850, %t_175), kwargs = {})
%view_851 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_175, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_176 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_427, torch.int8), kwargs = {})
%getitem_452 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_176, 0), kwargs = {})
%getitem_453 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_176, 1), kwargs = {})
%quantize_per_token_176 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_427, %getitem_452, %getitem_453, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_176 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_176, %getitem_452, %getitem_453, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_176 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg671_1, %arg672_1, %arg673_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_176 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_176,), kwargs = {})
%view_852 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_176, [1, 4096]), kwargs = {})
%mm_176 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_852, %t_176), kwargs = {})
%view_853 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_176, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_177 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_427, torch.int8), kwargs = {})
%getitem_454 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_177, 0), kwargs = {})
%getitem_455 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_177, 1), kwargs = {})
%quantize_per_token_177 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_427, %getitem_454, %getitem_455, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_177 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_177, %getitem_454, %getitem_455, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_177 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg674_1, %arg675_1, %arg676_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_177 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_177,), kwargs = {})
%view_854 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_177, [1, 4096]), kwargs = {})
%mm_177 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_854, %t_177), kwargs = {})
%view_855 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_177, [1, 1, 4096]), kwargs = {})
%view_856 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_851, [1, 1, 32, 128]), kwargs = {})
%view_857 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_853, [1, 1, 32, 128]), kwargs = {})
%view_858 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_855, [1, 1, 32, 128]), kwargs = {})
%view_859 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_856, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_50 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_859, -1), kwargs = {})
%getitem_456 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_50, 0), kwargs = {})
%getitem_457 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_50, 1), kwargs = {})
%view_860 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_857, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_51 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_860, -1), kwargs = {})
%getitem_458 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_51, 0), kwargs = {})
%getitem_459 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_51, 1), kwargs = {})
%view_861 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_862 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_428 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_456, %view_861), kwargs = {})
%mul_429 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_457, %view_862), kwargs = {})
%sub_50 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_428, %mul_429), kwargs = {})
%mul_430 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_456, %view_862), kwargs = {})
%mul_431 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_457, %view_861), kwargs = {})
%add_176 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_430, %mul_431), kwargs = {})
%mul_432 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_458, %view_861), kwargs = {})
%mul_433 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_459, %view_862), kwargs = {})
%sub_51 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_432, %mul_433), kwargs = {})
%mul_434 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_458, %view_862), kwargs = {})
%mul_435 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_459, %view_861), kwargs = {})
%add_177 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_434, %mul_435), kwargs = {})
%stack_50 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_50, %add_176], -1), kwargs = {})
%view_863 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_50, [1, 1, 32, 128]), kwargs = {})
%stack_51 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_51, %add_177], -1), kwargs = {})
%view_864 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_51, [1, 1, 32, 128]), kwargs = {})
%transpose_125 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_863, 1, 2), kwargs = {})
%transpose_126 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_864, 1, 2), kwargs = {})
%transpose_127 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_858, 1, 2), kwargs = {})
%slice_251 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg677_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_252 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_251, 1, 0, 9223372036854775807), kwargs = {})
%view_865 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_126, [32, 1, 128]), kwargs = {})
%index_put_50 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_252, [None, None, %arg840_1], %view_865), kwargs = {})
%slice_253 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg677_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_100 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_253, %index_put_50, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_101 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg677_1, %slice_scatter_100, 0, 0, 9223372036854775807), kwargs = {})
%slice_256 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg678_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_257 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_256, 1, 0, 9223372036854775807), kwargs = {})
%view_866 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_127, [32, 1, 128]), kwargs = {})
%index_put_51 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_257, [None, None, %arg840_1], %view_866), kwargs = {})
%slice_258 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg678_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_102 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_258, %index_put_51, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_103 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg678_1, %slice_scatter_102, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_150 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg679_1, 0), kwargs = {})
%unsqueeze_151 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_150, 1), kwargs = {})
%index_27 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_151, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_153 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_101, 2), kwargs = {})
%expand_201 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_153, [1, 32, 1, 128, 128]), kwargs = {})
%clone_50 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_201,), kwargs = {memory_format: torch.contiguous_format})
%view_867 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_50, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_155 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_103, 2), kwargs = {})
%expand_203 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_155, [1, 32, 1, 128, 128]), kwargs = {})
%clone_51 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_203,), kwargs = {memory_format: torch.contiguous_format})
%view_868 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_51, [1, 32, 128, 128]), kwargs = {})
%zeros_like_25 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_27,), kwargs = {dtype: torch.float32})
%logical_not_25 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_27,), kwargs = {})
%masked_fill_25 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_25, %logical_not_25, -inf), kwargs = {})
%mul_436 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_125, 0.29730177875068026), kwargs = {})
%transpose_128 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_867, -2, -1), kwargs = {})
%mul_437 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_128, 0.29730177875068026), kwargs = {})
%expand_204 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_436, [1, 32, 1, 128]), kwargs = {})
%view_869 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_204, [32, 1, 128]), kwargs = {})
%expand_205 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_437, [1, 32, 128, 128]), kwargs = {})
%view_870 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_205, [32, 128, 128]), kwargs = {})
%bmm_50 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_869, %view_870), kwargs = {})
%view_871 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_50, [1, 32, 1, 128]), kwargs = {})
%add_178 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_871, %masked_fill_25), kwargs = {})
%_softmax_25 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_178, -1, False), kwargs = {})
%expand_206 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_25, [1, 32, 1, 128]), kwargs = {})
%view_872 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_206, [32, 1, 128]), kwargs = {})
%expand_207 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_868, [1, 32, 128, 128]), kwargs = {})
%view_873 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_207, [32, 128, 128]), kwargs = {})
%bmm_51 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_872, %view_873), kwargs = {})
%view_874 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_51, [1, 32, 1, 128]), kwargs = {})
%transpose_129 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_874, 1, 2), kwargs = {})
%view_875 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_129, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_178 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_875, torch.int8), kwargs = {})
%getitem_460 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_178, 0), kwargs = {})
%getitem_461 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_178, 1), kwargs = {})
%quantize_per_token_178 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_875, %getitem_460, %getitem_461, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_178 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_178, %getitem_460, %getitem_461, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_178 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg680_1, %arg681_1, %arg682_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_178 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_178,), kwargs = {})
%view_876 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_178, [1, 4096]), kwargs = {})
%mm_178 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_876, %t_178), kwargs = {})
%view_877 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_178, [1, 1, 4096]), kwargs = {})
%add_179 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_174, %view_877), kwargs = {})
%mul_438 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_179, %add_179), kwargs = {})
%mean_51 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_438, [-1], True), kwargs = {})
%add_180 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_51, 1e-06), kwargs = {})
%rsqrt_51 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_180,), kwargs = {})
%mul_439 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_179, %rsqrt_51), kwargs = {})
%mul_440 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_439, %arg52_1), kwargs = {})
%choose_qparams_per_token_asymmetric_179 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_440, torch.int8), kwargs = {})
%getitem_462 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_179, 0), kwargs = {})
%getitem_463 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_179, 1), kwargs = {})
%quantize_per_token_179 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_440, %getitem_462, %getitem_463, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_179 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_179, %getitem_462, %getitem_463, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_179 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg683_1, %arg684_1, %arg685_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_179 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_179,), kwargs = {})
%view_878 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_179, [1, 4096]), kwargs = {})
%mm_179 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_878, %t_179), kwargs = {})
%view_879 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_179, [1, 1, 11008]), kwargs = {})
%silu_25 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_879,), kwargs = {})
%choose_qparams_per_token_asymmetric_180 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_440, torch.int8), kwargs = {})
%getitem_464 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_180, 0), kwargs = {})
%getitem_465 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_180, 1), kwargs = {})
%quantize_per_token_180 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_440, %getitem_464, %getitem_465, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_180 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_180, %getitem_464, %getitem_465, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_180 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg686_1, %arg687_1, %arg688_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_180 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_180,), kwargs = {})
%view_880 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_180, [1, 4096]), kwargs = {})
%mm_180 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_880, %t_180), kwargs = {})
%view_881 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_180, [1, 1, 11008]), kwargs = {})
%mul_441 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_25, %view_881), kwargs = {})
%choose_qparams_per_token_asymmetric_181 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_441, torch.int8), kwargs = {})
%getitem_466 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_181, 0), kwargs = {})
%getitem_467 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_181, 1), kwargs = {})
%quantize_per_token_181 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_441, %getitem_466, %getitem_467, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_181 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_181, %getitem_466, %getitem_467, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_181 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg689_1, %arg690_1, %arg691_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_181 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_181,), kwargs = {})
%view_882 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_181, [1, 11008]), kwargs = {})
%mm_181 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_882, %t_181), kwargs = {})
%view_883 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_181, [1, 1, 4096]), kwargs = {})
%add_181 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_179, %view_883), kwargs = {})
%mul_442 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_181, %add_181), kwargs = {})
%mean_52 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_442, [-1], True), kwargs = {})
%add_182 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_52, 1e-06), kwargs = {})
%rsqrt_52 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_182,), kwargs = {})
%mul_443 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_181, %rsqrt_52), kwargs = {})
%mul_444 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_443, %arg53_1), kwargs = {})
%choose_qparams_per_token_asymmetric_182 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_444, torch.int8), kwargs = {})
%getitem_468 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_182, 0), kwargs = {})
%getitem_469 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_182, 1), kwargs = {})
%quantize_per_token_182 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_444, %getitem_468, %getitem_469, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_182 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_182, %getitem_468, %getitem_469, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_182 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg692_1, %arg693_1, %arg694_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_182 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_182,), kwargs = {})
%view_884 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_182, [1, 4096]), kwargs = {})
%mm_182 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_884, %t_182), kwargs = {})
%view_885 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_182, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_183 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_444, torch.int8), kwargs = {})
%getitem_470 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_183, 0), kwargs = {})
%getitem_471 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_183, 1), kwargs = {})
%quantize_per_token_183 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_444, %getitem_470, %getitem_471, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_183 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_183, %getitem_470, %getitem_471, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_183 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg695_1, %arg696_1, %arg697_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_183 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_183,), kwargs = {})
%view_886 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_183, [1, 4096]), kwargs = {})
%mm_183 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_886, %t_183), kwargs = {})
%view_887 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_183, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_184 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_444, torch.int8), kwargs = {})
%getitem_472 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_184, 0), kwargs = {})
%getitem_473 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_184, 1), kwargs = {})
%quantize_per_token_184 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_444, %getitem_472, %getitem_473, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_184 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_184, %getitem_472, %getitem_473, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_184 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg698_1, %arg699_1, %arg700_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_184 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_184,), kwargs = {})
%view_888 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_184, [1, 4096]), kwargs = {})
%mm_184 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_888, %t_184), kwargs = {})
%view_889 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_184, [1, 1, 4096]), kwargs = {})
%view_890 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_885, [1, 1, 32, 128]), kwargs = {})
%view_891 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_887, [1, 1, 32, 128]), kwargs = {})
%view_892 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_889, [1, 1, 32, 128]), kwargs = {})
%view_893 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_890, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_52 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_893, -1), kwargs = {})
%getitem_474 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_52, 0), kwargs = {})
%getitem_475 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_52, 1), kwargs = {})
%view_894 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_891, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_53 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_894, -1), kwargs = {})
%getitem_476 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_53, 0), kwargs = {})
%getitem_477 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_53, 1), kwargs = {})
%view_895 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_896 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_445 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_474, %view_895), kwargs = {})
%mul_446 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_475, %view_896), kwargs = {})
%sub_52 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_445, %mul_446), kwargs = {})
%mul_447 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_474, %view_896), kwargs = {})
%mul_448 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_475, %view_895), kwargs = {})
%add_183 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_447, %mul_448), kwargs = {})
%mul_449 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_476, %view_895), kwargs = {})
%mul_450 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_477, %view_896), kwargs = {})
%sub_53 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_449, %mul_450), kwargs = {})
%mul_451 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_476, %view_896), kwargs = {})
%mul_452 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_477, %view_895), kwargs = {})
%add_184 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_451, %mul_452), kwargs = {})
%stack_52 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_52, %add_183], -1), kwargs = {})
%view_897 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_52, [1, 1, 32, 128]), kwargs = {})
%stack_53 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_53, %add_184], -1), kwargs = {})
%view_898 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_53, [1, 1, 32, 128]), kwargs = {})
%transpose_130 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_897, 1, 2), kwargs = {})
%transpose_131 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_898, 1, 2), kwargs = {})
%transpose_132 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_892, 1, 2), kwargs = {})
%slice_261 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg701_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_262 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_261, 1, 0, 9223372036854775807), kwargs = {})
%view_899 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_131, [32, 1, 128]), kwargs = {})
%index_put_52 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_262, [None, None, %arg840_1], %view_899), kwargs = {})
%slice_263 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg701_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_104 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_263, %index_put_52, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_105 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg701_1, %slice_scatter_104, 0, 0, 9223372036854775807), kwargs = {})
%slice_266 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg702_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_267 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_266, 1, 0, 9223372036854775807), kwargs = {})
%view_900 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_132, [32, 1, 128]), kwargs = {})
%index_put_53 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_267, [None, None, %arg840_1], %view_900), kwargs = {})
%slice_268 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg702_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_106 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_268, %index_put_53, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_107 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg702_1, %slice_scatter_106, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_156 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg703_1, 0), kwargs = {})
%unsqueeze_157 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_156, 1), kwargs = {})
%index_28 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_157, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_159 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_105, 2), kwargs = {})
%expand_209 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_159, [1, 32, 1, 128, 128]), kwargs = {})
%clone_52 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_209,), kwargs = {memory_format: torch.contiguous_format})
%view_901 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_52, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_161 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_107, 2), kwargs = {})
%expand_211 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_161, [1, 32, 1, 128, 128]), kwargs = {})
%clone_53 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_211,), kwargs = {memory_format: torch.contiguous_format})
%view_902 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_53, [1, 32, 128, 128]), kwargs = {})
%zeros_like_26 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_28,), kwargs = {dtype: torch.float32})
%logical_not_26 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_28,), kwargs = {})
%masked_fill_26 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_26, %logical_not_26, -inf), kwargs = {})
%mul_453 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_130, 0.29730177875068026), kwargs = {})
%transpose_133 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_901, -2, -1), kwargs = {})
%mul_454 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_133, 0.29730177875068026), kwargs = {})
%expand_212 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_453, [1, 32, 1, 128]), kwargs = {})
%view_903 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_212, [32, 1, 128]), kwargs = {})
%expand_213 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_454, [1, 32, 128, 128]), kwargs = {})
%view_904 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_213, [32, 128, 128]), kwargs = {})
%bmm_52 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_903, %view_904), kwargs = {})
%view_905 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_52, [1, 32, 1, 128]), kwargs = {})
%add_185 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_905, %masked_fill_26), kwargs = {})
%_softmax_26 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_185, -1, False), kwargs = {})
%expand_214 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_26, [1, 32, 1, 128]), kwargs = {})
%view_906 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_214, [32, 1, 128]), kwargs = {})
%expand_215 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_902, [1, 32, 128, 128]), kwargs = {})
%view_907 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_215, [32, 128, 128]), kwargs = {})
%bmm_53 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_906, %view_907), kwargs = {})
%view_908 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_53, [1, 32, 1, 128]), kwargs = {})
%transpose_134 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_908, 1, 2), kwargs = {})
%view_909 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_134, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_185 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_909, torch.int8), kwargs = {})
%getitem_478 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_185, 0), kwargs = {})
%getitem_479 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_185, 1), kwargs = {})
%quantize_per_token_185 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_909, %getitem_478, %getitem_479, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_185 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_185, %getitem_478, %getitem_479, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_185 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg704_1, %arg705_1, %arg706_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_185 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_185,), kwargs = {})
%view_910 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_185, [1, 4096]), kwargs = {})
%mm_185 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_910, %t_185), kwargs = {})
%view_911 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_185, [1, 1, 4096]), kwargs = {})
%add_186 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_181, %view_911), kwargs = {})
%mul_455 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_186, %add_186), kwargs = {})
%mean_53 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_455, [-1], True), kwargs = {})
%add_187 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_53, 1e-06), kwargs = {})
%rsqrt_53 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_187,), kwargs = {})
%mul_456 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_186, %rsqrt_53), kwargs = {})
%mul_457 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_456, %arg54_1), kwargs = {})
%choose_qparams_per_token_asymmetric_186 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_457, torch.int8), kwargs = {})
%getitem_480 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_186, 0), kwargs = {})
%getitem_481 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_186, 1), kwargs = {})
%quantize_per_token_186 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_457, %getitem_480, %getitem_481, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_186 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_186, %getitem_480, %getitem_481, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_186 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg707_1, %arg708_1, %arg709_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_186 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_186,), kwargs = {})
%view_912 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_186, [1, 4096]), kwargs = {})
%mm_186 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_912, %t_186), kwargs = {})
%view_913 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_186, [1, 1, 11008]), kwargs = {})
%silu_26 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_913,), kwargs = {})
%choose_qparams_per_token_asymmetric_187 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_457, torch.int8), kwargs = {})
%getitem_482 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_187, 0), kwargs = {})
%getitem_483 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_187, 1), kwargs = {})
%quantize_per_token_187 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_457, %getitem_482, %getitem_483, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_187 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_187, %getitem_482, %getitem_483, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_187 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg710_1, %arg711_1, %arg712_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_187 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_187,), kwargs = {})
%view_914 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_187, [1, 4096]), kwargs = {})
%mm_187 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_914, %t_187), kwargs = {})
%view_915 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_187, [1, 1, 11008]), kwargs = {})
%mul_458 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_26, %view_915), kwargs = {})
%choose_qparams_per_token_asymmetric_188 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_458, torch.int8), kwargs = {})
%getitem_484 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_188, 0), kwargs = {})
%getitem_485 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_188, 1), kwargs = {})
%quantize_per_token_188 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_458, %getitem_484, %getitem_485, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_188 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_188, %getitem_484, %getitem_485, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_188 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg713_1, %arg714_1, %arg715_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_188 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_188,), kwargs = {})
%view_916 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_188, [1, 11008]), kwargs = {})
%mm_188 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_916, %t_188), kwargs = {})
%view_917 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_188, [1, 1, 4096]), kwargs = {})
%add_188 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_186, %view_917), kwargs = {})
%mul_459 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_188, %add_188), kwargs = {})
%mean_54 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_459, [-1], True), kwargs = {})
%add_189 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_54, 1e-06), kwargs = {})
%rsqrt_54 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_189,), kwargs = {})
%mul_460 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_188, %rsqrt_54), kwargs = {})
%mul_461 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_460, %arg55_1), kwargs = {})
%choose_qparams_per_token_asymmetric_189 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_461, torch.int8), kwargs = {})
%getitem_486 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_189, 0), kwargs = {})
%getitem_487 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_189, 1), kwargs = {})
%quantize_per_token_189 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_461, %getitem_486, %getitem_487, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_189 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_189, %getitem_486, %getitem_487, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_189 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg716_1, %arg717_1, %arg718_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_189 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_189,), kwargs = {})
%view_918 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_189, [1, 4096]), kwargs = {})
%mm_189 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_918, %t_189), kwargs = {})
%view_919 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_189, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_190 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_461, torch.int8), kwargs = {})
%getitem_488 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_190, 0), kwargs = {})
%getitem_489 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_190, 1), kwargs = {})
%quantize_per_token_190 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_461, %getitem_488, %getitem_489, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_190 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_190, %getitem_488, %getitem_489, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_190 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg719_1, %arg720_1, %arg721_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_190 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_190,), kwargs = {})
%view_920 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_190, [1, 4096]), kwargs = {})
%mm_190 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_920, %t_190), kwargs = {})
%view_921 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_190, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_191 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_461, torch.int8), kwargs = {})
%getitem_490 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_191, 0), kwargs = {})
%getitem_491 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_191, 1), kwargs = {})
%quantize_per_token_191 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_461, %getitem_490, %getitem_491, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_191 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_191, %getitem_490, %getitem_491, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_191 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg722_1, %arg723_1, %arg724_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_191 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_191,), kwargs = {})
%view_922 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_191, [1, 4096]), kwargs = {})
%mm_191 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_922, %t_191), kwargs = {})
%view_923 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_191, [1, 1, 4096]), kwargs = {})
%view_924 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_919, [1, 1, 32, 128]), kwargs = {})
%view_925 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_921, [1, 1, 32, 128]), kwargs = {})
%view_926 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_923, [1, 1, 32, 128]), kwargs = {})
%view_927 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_924, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_54 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_927, -1), kwargs = {})
%getitem_492 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_54, 0), kwargs = {})
%getitem_493 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_54, 1), kwargs = {})
%view_928 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_925, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_55 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_928, -1), kwargs = {})
%getitem_494 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_55, 0), kwargs = {})
%getitem_495 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_55, 1), kwargs = {})
%view_929 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_930 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_462 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_492, %view_929), kwargs = {})
%mul_463 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_493, %view_930), kwargs = {})
%sub_54 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_462, %mul_463), kwargs = {})
%mul_464 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_492, %view_930), kwargs = {})
%mul_465 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_493, %view_929), kwargs = {})
%add_190 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_464, %mul_465), kwargs = {})
%mul_466 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_494, %view_929), kwargs = {})
%mul_467 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_495, %view_930), kwargs = {})
%sub_55 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_466, %mul_467), kwargs = {})
%mul_468 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_494, %view_930), kwargs = {})
%mul_469 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_495, %view_929), kwargs = {})
%add_191 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_468, %mul_469), kwargs = {})
%stack_54 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_54, %add_190], -1), kwargs = {})
%view_931 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_54, [1, 1, 32, 128]), kwargs = {})
%stack_55 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_55, %add_191], -1), kwargs = {})
%view_932 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_55, [1, 1, 32, 128]), kwargs = {})
%transpose_135 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_931, 1, 2), kwargs = {})
%transpose_136 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_932, 1, 2), kwargs = {})
%transpose_137 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_926, 1, 2), kwargs = {})
%slice_271 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg725_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_272 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_271, 1, 0, 9223372036854775807), kwargs = {})
%view_933 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_136, [32, 1, 128]), kwargs = {})
%index_put_54 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_272, [None, None, %arg840_1], %view_933), kwargs = {})
%slice_273 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg725_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_108 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_273, %index_put_54, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_109 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg725_1, %slice_scatter_108, 0, 0, 9223372036854775807), kwargs = {})
%slice_276 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg726_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_277 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_276, 1, 0, 9223372036854775807), kwargs = {})
%view_934 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_137, [32, 1, 128]), kwargs = {})
%index_put_55 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_277, [None, None, %arg840_1], %view_934), kwargs = {})
%slice_278 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg726_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_110 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_278, %index_put_55, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_111 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg726_1, %slice_scatter_110, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_162 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg727_1, 0), kwargs = {})
%unsqueeze_163 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_162, 1), kwargs = {})
%index_29 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_163, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_165 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_109, 2), kwargs = {})
%expand_217 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_165, [1, 32, 1, 128, 128]), kwargs = {})
%clone_54 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_217,), kwargs = {memory_format: torch.contiguous_format})
%view_935 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_54, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_167 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_111, 2), kwargs = {})
%expand_219 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_167, [1, 32, 1, 128, 128]), kwargs = {})
%clone_55 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_219,), kwargs = {memory_format: torch.contiguous_format})
%view_936 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_55, [1, 32, 128, 128]), kwargs = {})
%zeros_like_27 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_29,), kwargs = {dtype: torch.float32})
%logical_not_27 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_29,), kwargs = {})
%masked_fill_27 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_27, %logical_not_27, -inf), kwargs = {})
%mul_470 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_135, 0.29730177875068026), kwargs = {})
%transpose_138 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_935, -2, -1), kwargs = {})
%mul_471 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_138, 0.29730177875068026), kwargs = {})
%expand_220 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_470, [1, 32, 1, 128]), kwargs = {})
%view_937 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_220, [32, 1, 128]), kwargs = {})
%expand_221 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_471, [1, 32, 128, 128]), kwargs = {})
%view_938 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_221, [32, 128, 128]), kwargs = {})
%bmm_54 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_937, %view_938), kwargs = {})
%view_939 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_54, [1, 32, 1, 128]), kwargs = {})
%add_192 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_939, %masked_fill_27), kwargs = {})
%_softmax_27 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_192, -1, False), kwargs = {})
%expand_222 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_27, [1, 32, 1, 128]), kwargs = {})
%view_940 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_222, [32, 1, 128]), kwargs = {})
%expand_223 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_936, [1, 32, 128, 128]), kwargs = {})
%view_941 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_223, [32, 128, 128]), kwargs = {})
%bmm_55 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_940, %view_941), kwargs = {})
%view_942 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_55, [1, 32, 1, 128]), kwargs = {})
%transpose_139 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_942, 1, 2), kwargs = {})
%view_943 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_139, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_192 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_943, torch.int8), kwargs = {})
%getitem_496 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_192, 0), kwargs = {})
%getitem_497 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_192, 1), kwargs = {})
%quantize_per_token_192 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_943, %getitem_496, %getitem_497, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_192 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_192, %getitem_496, %getitem_497, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_192 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg728_1, %arg729_1, %arg730_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_192 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_192,), kwargs = {})
%view_944 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_192, [1, 4096]), kwargs = {})
%mm_192 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_944, %t_192), kwargs = {})
%view_945 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_192, [1, 1, 4096]), kwargs = {})
%add_193 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_188, %view_945), kwargs = {})
%mul_472 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_193, %add_193), kwargs = {})
%mean_55 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_472, [-1], True), kwargs = {})
%add_194 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_55, 1e-06), kwargs = {})
%rsqrt_55 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_194,), kwargs = {})
%mul_473 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_193, %rsqrt_55), kwargs = {})
%mul_474 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_473, %arg56_1), kwargs = {})
%choose_qparams_per_token_asymmetric_193 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_474, torch.int8), kwargs = {})
%getitem_498 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_193, 0), kwargs = {})
%getitem_499 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_193, 1), kwargs = {})
%quantize_per_token_193 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_474, %getitem_498, %getitem_499, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_193 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_193, %getitem_498, %getitem_499, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_193 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg731_1, %arg732_1, %arg733_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_193 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_193,), kwargs = {})
%view_946 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_193, [1, 4096]), kwargs = {})
%mm_193 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_946, %t_193), kwargs = {})
%view_947 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_193, [1, 1, 11008]), kwargs = {})
%silu_27 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_947,), kwargs = {})
%choose_qparams_per_token_asymmetric_194 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_474, torch.int8), kwargs = {})
%getitem_500 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_194, 0), kwargs = {})
%getitem_501 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_194, 1), kwargs = {})
%quantize_per_token_194 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_474, %getitem_500, %getitem_501, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_194 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_194, %getitem_500, %getitem_501, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_194 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg734_1, %arg735_1, %arg736_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_194 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_194,), kwargs = {})
%view_948 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_194, [1, 4096]), kwargs = {})
%mm_194 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_948, %t_194), kwargs = {})
%view_949 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_194, [1, 1, 11008]), kwargs = {})
%mul_475 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_27, %view_949), kwargs = {})
%choose_qparams_per_token_asymmetric_195 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_475, torch.int8), kwargs = {})
%getitem_502 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_195, 0), kwargs = {})
%getitem_503 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_195, 1), kwargs = {})
%quantize_per_token_195 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_475, %getitem_502, %getitem_503, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_195 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_195, %getitem_502, %getitem_503, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_195 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg737_1, %arg738_1, %arg739_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_195 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_195,), kwargs = {})
%view_950 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_195, [1, 11008]), kwargs = {})
%mm_195 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_950, %t_195), kwargs = {})
%view_951 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_195, [1, 1, 4096]), kwargs = {})
%add_195 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_193, %view_951), kwargs = {})
%mul_476 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_195, %add_195), kwargs = {})
%mean_56 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_476, [-1], True), kwargs = {})
%add_196 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_56, 1e-06), kwargs = {})
%rsqrt_56 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_196,), kwargs = {})
%mul_477 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_195, %rsqrt_56), kwargs = {})
%mul_478 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_477, %arg57_1), kwargs = {})
%choose_qparams_per_token_asymmetric_196 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_478, torch.int8), kwargs = {})
%getitem_504 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_196, 0), kwargs = {})
%getitem_505 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_196, 1), kwargs = {})
%quantize_per_token_196 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_478, %getitem_504, %getitem_505, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_196 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_196, %getitem_504, %getitem_505, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_196 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg740_1, %arg741_1, %arg742_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_196 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_196,), kwargs = {})
%view_952 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_196, [1, 4096]), kwargs = {})
%mm_196 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_952, %t_196), kwargs = {})
%view_953 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_196, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_197 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_478, torch.int8), kwargs = {})
%getitem_506 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_197, 0), kwargs = {})
%getitem_507 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_197, 1), kwargs = {})
%quantize_per_token_197 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_478, %getitem_506, %getitem_507, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_197 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_197, %getitem_506, %getitem_507, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_197 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg743_1, %arg744_1, %arg745_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_197 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_197,), kwargs = {})
%view_954 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_197, [1, 4096]), kwargs = {})
%mm_197 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_954, %t_197), kwargs = {})
%view_955 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_197, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_198 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_478, torch.int8), kwargs = {})
%getitem_508 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_198, 0), kwargs = {})
%getitem_509 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_198, 1), kwargs = {})
%quantize_per_token_198 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_478, %getitem_508, %getitem_509, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_198 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_198, %getitem_508, %getitem_509, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_198 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg746_1, %arg747_1, %arg748_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_198 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_198,), kwargs = {})
%view_956 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_198, [1, 4096]), kwargs = {})
%mm_198 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_956, %t_198), kwargs = {})
%view_957 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_198, [1, 1, 4096]), kwargs = {})
%view_958 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_953, [1, 1, 32, 128]), kwargs = {})
%view_959 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_955, [1, 1, 32, 128]), kwargs = {})
%view_960 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_957, [1, 1, 32, 128]), kwargs = {})
%view_961 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_958, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_56 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_961, -1), kwargs = {})
%getitem_510 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_56, 0), kwargs = {})
%getitem_511 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_56, 1), kwargs = {})
%view_962 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_959, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_57 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_962, -1), kwargs = {})
%getitem_512 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_57, 0), kwargs = {})
%getitem_513 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_57, 1), kwargs = {})
%view_963 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_964 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_479 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_510, %view_963), kwargs = {})
%mul_480 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_511, %view_964), kwargs = {})
%sub_56 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_479, %mul_480), kwargs = {})
%mul_481 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_510, %view_964), kwargs = {})
%mul_482 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_511, %view_963), kwargs = {})
%add_197 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_481, %mul_482), kwargs = {})
%mul_483 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_512, %view_963), kwargs = {})
%mul_484 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_513, %view_964), kwargs = {})
%sub_57 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_483, %mul_484), kwargs = {})
%mul_485 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_512, %view_964), kwargs = {})
%mul_486 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_513, %view_963), kwargs = {})
%add_198 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_485, %mul_486), kwargs = {})
%stack_56 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_56, %add_197], -1), kwargs = {})
%view_965 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_56, [1, 1, 32, 128]), kwargs = {})
%stack_57 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_57, %add_198], -1), kwargs = {})
%view_966 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_57, [1, 1, 32, 128]), kwargs = {})
%transpose_140 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_965, 1, 2), kwargs = {})
%transpose_141 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_966, 1, 2), kwargs = {})
%transpose_142 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_960, 1, 2), kwargs = {})
%slice_281 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg749_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_282 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_281, 1, 0, 9223372036854775807), kwargs = {})
%view_967 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_141, [32, 1, 128]), kwargs = {})
%index_put_56 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_282, [None, None, %arg840_1], %view_967), kwargs = {})
%slice_283 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg749_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_112 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_283, %index_put_56, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_113 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg749_1, %slice_scatter_112, 0, 0, 9223372036854775807), kwargs = {})
%slice_286 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg750_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_287 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_286, 1, 0, 9223372036854775807), kwargs = {})
%view_968 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_142, [32, 1, 128]), kwargs = {})
%index_put_57 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_287, [None, None, %arg840_1], %view_968), kwargs = {})
%slice_288 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg750_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_114 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_288, %index_put_57, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_115 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg750_1, %slice_scatter_114, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_168 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg751_1, 0), kwargs = {})
%unsqueeze_169 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_168, 1), kwargs = {})
%index_30 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_169, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_171 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_113, 2), kwargs = {})
%expand_225 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_171, [1, 32, 1, 128, 128]), kwargs = {})
%clone_56 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_225,), kwargs = {memory_format: torch.contiguous_format})
%view_969 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_56, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_173 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_115, 2), kwargs = {})
%expand_227 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_173, [1, 32, 1, 128, 128]), kwargs = {})
%clone_57 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_227,), kwargs = {memory_format: torch.contiguous_format})
%view_970 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_57, [1, 32, 128, 128]), kwargs = {})
%zeros_like_28 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_30,), kwargs = {dtype: torch.float32})
%logical_not_28 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_30,), kwargs = {})
%masked_fill_28 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_28, %logical_not_28, -inf), kwargs = {})
%mul_487 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_140, 0.29730177875068026), kwargs = {})
%transpose_143 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_969, -2, -1), kwargs = {})
%mul_488 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_143, 0.29730177875068026), kwargs = {})
%expand_228 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_487, [1, 32, 1, 128]), kwargs = {})
%view_971 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_228, [32, 1, 128]), kwargs = {})
%expand_229 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_488, [1, 32, 128, 128]), kwargs = {})
%view_972 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_229, [32, 128, 128]), kwargs = {})
%bmm_56 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_971, %view_972), kwargs = {})
%view_973 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_56, [1, 32, 1, 128]), kwargs = {})
%add_199 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_973, %masked_fill_28), kwargs = {})
%_softmax_28 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_199, -1, False), kwargs = {})
%expand_230 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_28, [1, 32, 1, 128]), kwargs = {})
%view_974 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_230, [32, 1, 128]), kwargs = {})
%expand_231 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_970, [1, 32, 128, 128]), kwargs = {})
%view_975 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_231, [32, 128, 128]), kwargs = {})
%bmm_57 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_974, %view_975), kwargs = {})
%view_976 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_57, [1, 32, 1, 128]), kwargs = {})
%transpose_144 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_976, 1, 2), kwargs = {})
%view_977 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_144, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_199 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_977, torch.int8), kwargs = {})
%getitem_514 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_199, 0), kwargs = {})
%getitem_515 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_199, 1), kwargs = {})
%quantize_per_token_199 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_977, %getitem_514, %getitem_515, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_199 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_199, %getitem_514, %getitem_515, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_199 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg752_1, %arg753_1, %arg754_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_199 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_199,), kwargs = {})
%view_978 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_199, [1, 4096]), kwargs = {})
%mm_199 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_978, %t_199), kwargs = {})
%view_979 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_199, [1, 1, 4096]), kwargs = {})
%add_200 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_195, %view_979), kwargs = {})
%mul_489 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_200, %add_200), kwargs = {})
%mean_57 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_489, [-1], True), kwargs = {})
%add_201 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_57, 1e-06), kwargs = {})
%rsqrt_57 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_201,), kwargs = {})
%mul_490 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_200, %rsqrt_57), kwargs = {})
%mul_491 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_490, %arg58_1), kwargs = {})
%choose_qparams_per_token_asymmetric_200 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_491, torch.int8), kwargs = {})
%getitem_516 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_200, 0), kwargs = {})
%getitem_517 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_200, 1), kwargs = {})
%quantize_per_token_200 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_491, %getitem_516, %getitem_517, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_200 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_200, %getitem_516, %getitem_517, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_200 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg755_1, %arg756_1, %arg757_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_200 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_200,), kwargs = {})
%view_980 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_200, [1, 4096]), kwargs = {})
%mm_200 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_980, %t_200), kwargs = {})
%view_981 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_200, [1, 1, 11008]), kwargs = {})
%silu_28 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_981,), kwargs = {})
%choose_qparams_per_token_asymmetric_201 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_491, torch.int8), kwargs = {})
%getitem_518 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_201, 0), kwargs = {})
%getitem_519 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_201, 1), kwargs = {})
%quantize_per_token_201 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_491, %getitem_518, %getitem_519, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_201 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_201, %getitem_518, %getitem_519, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_201 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg758_1, %arg759_1, %arg760_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_201 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_201,), kwargs = {})
%view_982 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_201, [1, 4096]), kwargs = {})
%mm_201 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_982, %t_201), kwargs = {})
%view_983 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_201, [1, 1, 11008]), kwargs = {})
%mul_492 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_28, %view_983), kwargs = {})
%choose_qparams_per_token_asymmetric_202 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_492, torch.int8), kwargs = {})
%getitem_520 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_202, 0), kwargs = {})
%getitem_521 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_202, 1), kwargs = {})
%quantize_per_token_202 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_492, %getitem_520, %getitem_521, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_202 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_202, %getitem_520, %getitem_521, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_202 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg761_1, %arg762_1, %arg763_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_202 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_202,), kwargs = {})
%view_984 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_202, [1, 11008]), kwargs = {})
%mm_202 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_984, %t_202), kwargs = {})
%view_985 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_202, [1, 1, 4096]), kwargs = {})
%add_202 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_200, %view_985), kwargs = {})
%mul_493 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_202, %add_202), kwargs = {})
%mean_58 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_493, [-1], True), kwargs = {})
%add_203 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_58, 1e-06), kwargs = {})
%rsqrt_58 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_203,), kwargs = {})
%mul_494 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_202, %rsqrt_58), kwargs = {})
%mul_495 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_494, %arg59_1), kwargs = {})
%choose_qparams_per_token_asymmetric_203 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_495, torch.int8), kwargs = {})
%getitem_522 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_203, 0), kwargs = {})
%getitem_523 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_203, 1), kwargs = {})
%quantize_per_token_203 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_495, %getitem_522, %getitem_523, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_203 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_203, %getitem_522, %getitem_523, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_203 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg764_1, %arg765_1, %arg766_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_203 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_203,), kwargs = {})
%view_986 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_203, [1, 4096]), kwargs = {})
%mm_203 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_986, %t_203), kwargs = {})
%view_987 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_203, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_204 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_495, torch.int8), kwargs = {})
%getitem_524 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_204, 0), kwargs = {})
%getitem_525 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_204, 1), kwargs = {})
%quantize_per_token_204 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_495, %getitem_524, %getitem_525, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_204 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_204, %getitem_524, %getitem_525, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_204 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg767_1, %arg768_1, %arg769_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_204 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_204,), kwargs = {})
%view_988 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_204, [1, 4096]), kwargs = {})
%mm_204 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_988, %t_204), kwargs = {})
%view_989 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_204, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_205 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_495, torch.int8), kwargs = {})
%getitem_526 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_205, 0), kwargs = {})
%getitem_527 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_205, 1), kwargs = {})
%quantize_per_token_205 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_495, %getitem_526, %getitem_527, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_205 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_205, %getitem_526, %getitem_527, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_205 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg770_1, %arg771_1, %arg772_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_205 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_205,), kwargs = {})
%view_990 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_205, [1, 4096]), kwargs = {})
%mm_205 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_990, %t_205), kwargs = {})
%view_991 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_205, [1, 1, 4096]), kwargs = {})
%view_992 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_987, [1, 1, 32, 128]), kwargs = {})
%view_993 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_989, [1, 1, 32, 128]), kwargs = {})
%view_994 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_991, [1, 1, 32, 128]), kwargs = {})
%view_995 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_992, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_58 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_995, -1), kwargs = {})
%getitem_528 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_58, 0), kwargs = {})
%getitem_529 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_58, 1), kwargs = {})
%view_996 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_993, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_59 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_996, -1), kwargs = {})
%getitem_530 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_59, 0), kwargs = {})
%getitem_531 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_59, 1), kwargs = {})
%view_997 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_998 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_496 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_528, %view_997), kwargs = {})
%mul_497 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_529, %view_998), kwargs = {})
%sub_58 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_496, %mul_497), kwargs = {})
%mul_498 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_528, %view_998), kwargs = {})
%mul_499 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_529, %view_997), kwargs = {})
%add_204 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_498, %mul_499), kwargs = {})
%mul_500 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_530, %view_997), kwargs = {})
%mul_501 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_531, %view_998), kwargs = {})
%sub_59 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_500, %mul_501), kwargs = {})
%mul_502 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_530, %view_998), kwargs = {})
%mul_503 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_531, %view_997), kwargs = {})
%add_205 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_502, %mul_503), kwargs = {})
%stack_58 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_58, %add_204], -1), kwargs = {})
%view_999 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_58, [1, 1, 32, 128]), kwargs = {})
%stack_59 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_59, %add_205], -1), kwargs = {})
%view_1000 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_59, [1, 1, 32, 128]), kwargs = {})
%transpose_145 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_999, 1, 2), kwargs = {})
%transpose_146 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_1000, 1, 2), kwargs = {})
%transpose_147 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_994, 1, 2), kwargs = {})
%slice_291 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg773_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_292 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_291, 1, 0, 9223372036854775807), kwargs = {})
%view_1001 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_146, [32, 1, 128]), kwargs = {})
%index_put_58 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_292, [None, None, %arg840_1], %view_1001), kwargs = {})
%slice_293 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg773_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_116 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_293, %index_put_58, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_117 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg773_1, %slice_scatter_116, 0, 0, 9223372036854775807), kwargs = {})
%slice_296 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg774_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_297 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_296, 1, 0, 9223372036854775807), kwargs = {})
%view_1002 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_147, [32, 1, 128]), kwargs = {})
%index_put_59 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_297, [None, None, %arg840_1], %view_1002), kwargs = {})
%slice_298 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg774_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_118 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_298, %index_put_59, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_119 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg774_1, %slice_scatter_118, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_174 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg775_1, 0), kwargs = {})
%unsqueeze_175 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_174, 1), kwargs = {})
%index_31 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_175, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_177 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_117, 2), kwargs = {})
%expand_233 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_177, [1, 32, 1, 128, 128]), kwargs = {})
%clone_58 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_233,), kwargs = {memory_format: torch.contiguous_format})
%view_1003 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_58, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_179 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_119, 2), kwargs = {})
%expand_235 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_179, [1, 32, 1, 128, 128]), kwargs = {})
%clone_59 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_235,), kwargs = {memory_format: torch.contiguous_format})
%view_1004 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_59, [1, 32, 128, 128]), kwargs = {})
%zeros_like_29 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_31,), kwargs = {dtype: torch.float32})
%logical_not_29 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_31,), kwargs = {})
%masked_fill_29 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_29, %logical_not_29, -inf), kwargs = {})
%mul_504 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_145, 0.29730177875068026), kwargs = {})
%transpose_148 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_1003, -2, -1), kwargs = {})
%mul_505 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_148, 0.29730177875068026), kwargs = {})
%expand_236 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_504, [1, 32, 1, 128]), kwargs = {})
%view_1005 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_236, [32, 1, 128]), kwargs = {})
%expand_237 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_505, [1, 32, 128, 128]), kwargs = {})
%view_1006 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_237, [32, 128, 128]), kwargs = {})
%bmm_58 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_1005, %view_1006), kwargs = {})
%view_1007 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_58, [1, 32, 1, 128]), kwargs = {})
%add_206 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_1007, %masked_fill_29), kwargs = {})
%_softmax_29 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_206, -1, False), kwargs = {})
%expand_238 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_29, [1, 32, 1, 128]), kwargs = {})
%view_1008 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_238, [32, 1, 128]), kwargs = {})
%expand_239 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_1004, [1, 32, 128, 128]), kwargs = {})
%view_1009 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_239, [32, 128, 128]), kwargs = {})
%bmm_59 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_1008, %view_1009), kwargs = {})
%view_1010 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_59, [1, 32, 1, 128]), kwargs = {})
%transpose_149 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_1010, 1, 2), kwargs = {})
%view_1011 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_149, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_206 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_1011, torch.int8), kwargs = {})
%getitem_532 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_206, 0), kwargs = {})
%getitem_533 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_206, 1), kwargs = {})
%quantize_per_token_206 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_1011, %getitem_532, %getitem_533, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_206 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_206, %getitem_532, %getitem_533, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_206 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg776_1, %arg777_1, %arg778_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_206 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_206,), kwargs = {})
%view_1012 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_206, [1, 4096]), kwargs = {})
%mm_206 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1012, %t_206), kwargs = {})
%view_1013 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_206, [1, 1, 4096]), kwargs = {})
%add_207 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_202, %view_1013), kwargs = {})
%mul_506 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_207, %add_207), kwargs = {})
%mean_59 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_506, [-1], True), kwargs = {})
%add_208 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_59, 1e-06), kwargs = {})
%rsqrt_59 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_208,), kwargs = {})
%mul_507 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_207, %rsqrt_59), kwargs = {})
%mul_508 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_507, %arg60_1), kwargs = {})
%choose_qparams_per_token_asymmetric_207 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_508, torch.int8), kwargs = {})
%getitem_534 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_207, 0), kwargs = {})
%getitem_535 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_207, 1), kwargs = {})
%quantize_per_token_207 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_508, %getitem_534, %getitem_535, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_207 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_207, %getitem_534, %getitem_535, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_207 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg779_1, %arg780_1, %arg781_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_207 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_207,), kwargs = {})
%view_1014 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_207, [1, 4096]), kwargs = {})
%mm_207 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1014, %t_207), kwargs = {})
%view_1015 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_207, [1, 1, 11008]), kwargs = {})
%silu_29 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_1015,), kwargs = {})
%choose_qparams_per_token_asymmetric_208 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_508, torch.int8), kwargs = {})
%getitem_536 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_208, 0), kwargs = {})
%getitem_537 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_208, 1), kwargs = {})
%quantize_per_token_208 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_508, %getitem_536, %getitem_537, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_208 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_208, %getitem_536, %getitem_537, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_208 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg782_1, %arg783_1, %arg784_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_208 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_208,), kwargs = {})
%view_1016 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_208, [1, 4096]), kwargs = {})
%mm_208 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1016, %t_208), kwargs = {})
%view_1017 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_208, [1, 1, 11008]), kwargs = {})
%mul_509 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_29, %view_1017), kwargs = {})
%choose_qparams_per_token_asymmetric_209 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_509, torch.int8), kwargs = {})
%getitem_538 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_209, 0), kwargs = {})
%getitem_539 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_209, 1), kwargs = {})
%quantize_per_token_209 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_509, %getitem_538, %getitem_539, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_209 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_209, %getitem_538, %getitem_539, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_209 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg785_1, %arg786_1, %arg787_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_209 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_209,), kwargs = {})
%view_1018 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_209, [1, 11008]), kwargs = {})
%mm_209 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1018, %t_209), kwargs = {})
%view_1019 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_209, [1, 1, 4096]), kwargs = {})
%add_209 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_207, %view_1019), kwargs = {})
%mul_510 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_209, %add_209), kwargs = {})
%mean_60 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_510, [-1], True), kwargs = {})
%add_210 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_60, 1e-06), kwargs = {})
%rsqrt_60 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_210,), kwargs = {})
%mul_511 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_209, %rsqrt_60), kwargs = {})
%mul_512 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_511, %arg61_1), kwargs = {})
%choose_qparams_per_token_asymmetric_210 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_512, torch.int8), kwargs = {})
%getitem_540 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_210, 0), kwargs = {})
%getitem_541 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_210, 1), kwargs = {})
%quantize_per_token_210 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_512, %getitem_540, %getitem_541, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_210 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_210, %getitem_540, %getitem_541, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_210 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg788_1, %arg789_1, %arg790_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_210 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_210,), kwargs = {})
%view_1020 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_210, [1, 4096]), kwargs = {})
%mm_210 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1020, %t_210), kwargs = {})
%view_1021 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_210, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_211 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_512, torch.int8), kwargs = {})
%getitem_542 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_211, 0), kwargs = {})
%getitem_543 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_211, 1), kwargs = {})
%quantize_per_token_211 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_512, %getitem_542, %getitem_543, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_211 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_211, %getitem_542, %getitem_543, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_211 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg791_1, %arg792_1, %arg793_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_211 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_211,), kwargs = {})
%view_1022 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_211, [1, 4096]), kwargs = {})
%mm_211 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1022, %t_211), kwargs = {})
%view_1023 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_211, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_212 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_512, torch.int8), kwargs = {})
%getitem_544 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_212, 0), kwargs = {})
%getitem_545 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_212, 1), kwargs = {})
%quantize_per_token_212 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_512, %getitem_544, %getitem_545, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_212 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_212, %getitem_544, %getitem_545, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_212 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg794_1, %arg795_1, %arg796_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_212 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_212,), kwargs = {})
%view_1024 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_212, [1, 4096]), kwargs = {})
%mm_212 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1024, %t_212), kwargs = {})
%view_1025 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_212, [1, 1, 4096]), kwargs = {})
%view_1026 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_1021, [1, 1, 32, 128]), kwargs = {})
%view_1027 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_1023, [1, 1, 32, 128]), kwargs = {})
%view_1028 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_1025, [1, 1, 32, 128]), kwargs = {})
%view_1029 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_1026, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_60 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_1029, -1), kwargs = {})
%getitem_546 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_60, 0), kwargs = {})
%getitem_547 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_60, 1), kwargs = {})
%view_1030 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_1027, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_61 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_1030, -1), kwargs = {})
%getitem_548 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_61, 0), kwargs = {})
%getitem_549 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_61, 1), kwargs = {})
%view_1031 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_1032 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_513 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_546, %view_1031), kwargs = {})
%mul_514 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_547, %view_1032), kwargs = {})
%sub_60 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_513, %mul_514), kwargs = {})
%mul_515 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_546, %view_1032), kwargs = {})
%mul_516 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_547, %view_1031), kwargs = {})
%add_211 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_515, %mul_516), kwargs = {})
%mul_517 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_548, %view_1031), kwargs = {})
%mul_518 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_549, %view_1032), kwargs = {})
%sub_61 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_517, %mul_518), kwargs = {})
%mul_519 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_548, %view_1032), kwargs = {})
%mul_520 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_549, %view_1031), kwargs = {})
%add_212 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_519, %mul_520), kwargs = {})
%stack_60 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_60, %add_211], -1), kwargs = {})
%view_1033 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_60, [1, 1, 32, 128]), kwargs = {})
%stack_61 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_61, %add_212], -1), kwargs = {})
%view_1034 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_61, [1, 1, 32, 128]), kwargs = {})
%transpose_150 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_1033, 1, 2), kwargs = {})
%transpose_151 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_1034, 1, 2), kwargs = {})
%transpose_152 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_1028, 1, 2), kwargs = {})
%slice_301 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg797_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_302 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_301, 1, 0, 9223372036854775807), kwargs = {})
%view_1035 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_151, [32, 1, 128]), kwargs = {})
%index_put_60 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_302, [None, None, %arg840_1], %view_1035), kwargs = {})
%slice_303 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg797_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_120 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_303, %index_put_60, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_121 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg797_1, %slice_scatter_120, 0, 0, 9223372036854775807), kwargs = {})
%slice_306 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg798_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_307 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_306, 1, 0, 9223372036854775807), kwargs = {})
%view_1036 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_152, [32, 1, 128]), kwargs = {})
%index_put_61 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_307, [None, None, %arg840_1], %view_1036), kwargs = {})
%slice_308 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg798_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_122 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_308, %index_put_61, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_123 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg798_1, %slice_scatter_122, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_180 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg799_1, 0), kwargs = {})
%unsqueeze_181 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_180, 1), kwargs = {})
%index_32 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_181, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_183 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_121, 2), kwargs = {})
%expand_241 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_183, [1, 32, 1, 128, 128]), kwargs = {})
%clone_60 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_241,), kwargs = {memory_format: torch.contiguous_format})
%view_1037 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_60, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_185 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_123, 2), kwargs = {})
%expand_243 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_185, [1, 32, 1, 128, 128]), kwargs = {})
%clone_61 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_243,), kwargs = {memory_format: torch.contiguous_format})
%view_1038 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_61, [1, 32, 128, 128]), kwargs = {})
%zeros_like_30 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_32,), kwargs = {dtype: torch.float32})
%logical_not_30 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_32,), kwargs = {})
%masked_fill_30 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_30, %logical_not_30, -inf), kwargs = {})
%mul_521 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_150, 0.29730177875068026), kwargs = {})
%transpose_153 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_1037, -2, -1), kwargs = {})
%mul_522 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_153, 0.29730177875068026), kwargs = {})
%expand_244 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_521, [1, 32, 1, 128]), kwargs = {})
%view_1039 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_244, [32, 1, 128]), kwargs = {})
%expand_245 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_522, [1, 32, 128, 128]), kwargs = {})
%view_1040 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_245, [32, 128, 128]), kwargs = {})
%bmm_60 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_1039, %view_1040), kwargs = {})
%view_1041 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_60, [1, 32, 1, 128]), kwargs = {})
%add_213 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_1041, %masked_fill_30), kwargs = {})
%_softmax_30 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_213, -1, False), kwargs = {})
%expand_246 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_30, [1, 32, 1, 128]), kwargs = {})
%view_1042 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_246, [32, 1, 128]), kwargs = {})
%expand_247 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_1038, [1, 32, 128, 128]), kwargs = {})
%view_1043 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_247, [32, 128, 128]), kwargs = {})
%bmm_61 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_1042, %view_1043), kwargs = {})
%view_1044 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_61, [1, 32, 1, 128]), kwargs = {})
%transpose_154 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_1044, 1, 2), kwargs = {})
%view_1045 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_154, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_213 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_1045, torch.int8), kwargs = {})
%getitem_550 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_213, 0), kwargs = {})
%getitem_551 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_213, 1), kwargs = {})
%quantize_per_token_213 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_1045, %getitem_550, %getitem_551, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_213 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_213, %getitem_550, %getitem_551, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_213 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg800_1, %arg801_1, %arg802_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_213 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_213,), kwargs = {})
%view_1046 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_213, [1, 4096]), kwargs = {})
%mm_213 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1046, %t_213), kwargs = {})
%view_1047 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_213, [1, 1, 4096]), kwargs = {})
%add_214 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_209, %view_1047), kwargs = {})
%mul_523 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_214, %add_214), kwargs = {})
%mean_61 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_523, [-1], True), kwargs = {})
%add_215 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_61, 1e-06), kwargs = {})
%rsqrt_61 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_215,), kwargs = {})
%mul_524 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_214, %rsqrt_61), kwargs = {})
%mul_525 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_524, %arg62_1), kwargs = {})
%choose_qparams_per_token_asymmetric_214 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_525, torch.int8), kwargs = {})
%getitem_552 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_214, 0), kwargs = {})
%getitem_553 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_214, 1), kwargs = {})
%quantize_per_token_214 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_525, %getitem_552, %getitem_553, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_214 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_214, %getitem_552, %getitem_553, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_214 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg803_1, %arg804_1, %arg805_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_214 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_214,), kwargs = {})
%view_1048 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_214, [1, 4096]), kwargs = {})
%mm_214 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1048, %t_214), kwargs = {})
%view_1049 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_214, [1, 1, 11008]), kwargs = {})
%silu_30 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_1049,), kwargs = {})
%choose_qparams_per_token_asymmetric_215 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_525, torch.int8), kwargs = {})
%getitem_554 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_215, 0), kwargs = {})
%getitem_555 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_215, 1), kwargs = {})
%quantize_per_token_215 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_525, %getitem_554, %getitem_555, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_215 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_215, %getitem_554, %getitem_555, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_215 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg806_1, %arg807_1, %arg808_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_215 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_215,), kwargs = {})
%view_1050 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_215, [1, 4096]), kwargs = {})
%mm_215 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1050, %t_215), kwargs = {})
%view_1051 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_215, [1, 1, 11008]), kwargs = {})
%mul_526 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_30, %view_1051), kwargs = {})
%choose_qparams_per_token_asymmetric_216 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_526, torch.int8), kwargs = {})
%getitem_556 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_216, 0), kwargs = {})
%getitem_557 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_216, 1), kwargs = {})
%quantize_per_token_216 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_526, %getitem_556, %getitem_557, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_216 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_216, %getitem_556, %getitem_557, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_216 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg809_1, %arg810_1, %arg811_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_216 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_216,), kwargs = {})
%view_1052 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_216, [1, 11008]), kwargs = {})
%mm_216 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1052, %t_216), kwargs = {})
%view_1053 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_216, [1, 1, 4096]), kwargs = {})
%add_216 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_214, %view_1053), kwargs = {})
%mul_527 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_216, %add_216), kwargs = {})
%mean_62 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_527, [-1], True), kwargs = {})
%add_217 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_62, 1e-06), kwargs = {})
%rsqrt_62 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_217,), kwargs = {})
%mul_528 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_216, %rsqrt_62), kwargs = {})
%mul_529 : [num_users=6] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_528, %arg63_1), kwargs = {})
%choose_qparams_per_token_asymmetric_217 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_529, torch.int8), kwargs = {})
%getitem_558 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_217, 0), kwargs = {})
%getitem_559 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_217, 1), kwargs = {})
%quantize_per_token_217 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_529, %getitem_558, %getitem_559, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_217 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_217, %getitem_558, %getitem_559, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_217 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg812_1, %arg813_1, %arg814_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_217 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_217,), kwargs = {})
%view_1054 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_217, [1, 4096]), kwargs = {})
%mm_217 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1054, %t_217), kwargs = {})
%view_1055 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_217, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_218 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_529, torch.int8), kwargs = {})
%getitem_560 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_218, 0), kwargs = {})
%getitem_561 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_218, 1), kwargs = {})
%quantize_per_token_218 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_529, %getitem_560, %getitem_561, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_218 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_218, %getitem_560, %getitem_561, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_218 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg815_1, %arg816_1, %arg817_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_218 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_218,), kwargs = {})
%view_1056 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_218, [1, 4096]), kwargs = {})
%mm_218 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1056, %t_218), kwargs = {})
%view_1057 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_218, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_219 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_529, torch.int8), kwargs = {})
%getitem_562 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_219, 0), kwargs = {})
%getitem_563 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_219, 1), kwargs = {})
%quantize_per_token_219 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_529, %getitem_562, %getitem_563, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_219 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_219, %getitem_562, %getitem_563, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_219 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg818_1, %arg819_1, %arg820_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_219 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_219,), kwargs = {})
%view_1058 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_219, [1, 4096]), kwargs = {})
%mm_219 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1058, %t_219), kwargs = {})
%view_1059 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_219, [1, 1, 4096]), kwargs = {})
%view_1060 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_1055, [1, 1, 32, 128]), kwargs = {})
%view_1061 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_1057, [1, 1, 32, 128]), kwargs = {})
%view_1062 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_1059, [1, 1, 32, 128]), kwargs = {})
%view_1063 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_1060, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_62 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_1063, -1), kwargs = {})
%getitem_564 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_62, 0), kwargs = {})
%getitem_565 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_62, 1), kwargs = {})
%view_1064 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%view_1061, [1, 1, 32, -1, 2]), kwargs = {})
%unbind_63 : [num_users=2] = call_function[target=torch.ops.aten.unbind.int](args = (%view_1064, -1), kwargs = {})
%getitem_566 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_63, 0), kwargs = {})
%getitem_567 : [num_users=2] = call_function[target=operator.getitem](args = (%unbind_63, 1), kwargs = {})
%view_1065 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index, [1, 1, 1, 64]), kwargs = {})
%view_1066 : [num_users=4] = call_function[target=torch.ops.aten.view.default](args = (%index_1, [1, 1, 1, 64]), kwargs = {})
%mul_530 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_564, %view_1065), kwargs = {})
%mul_531 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_565, %view_1066), kwargs = {})
%sub_62 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_530, %mul_531), kwargs = {})
%mul_532 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_564, %view_1066), kwargs = {})
%mul_533 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_565, %view_1065), kwargs = {})
%add_218 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_532, %mul_533), kwargs = {})
%mul_534 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_566, %view_1065), kwargs = {})
%mul_535 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_567, %view_1066), kwargs = {})
%sub_63 : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%mul_534, %mul_535), kwargs = {})
%mul_536 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_566, %view_1066), kwargs = {})
%mul_537 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%getitem_567, %view_1065), kwargs = {})
%add_219 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mul_536, %mul_537), kwargs = {})
%stack_62 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_62, %add_218], -1), kwargs = {})
%view_1067 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_62, [1, 1, 32, 128]), kwargs = {})
%stack_63 : [num_users=1] = call_function[target=torch.ops.aten.stack.default](args = ([%sub_63, %add_219], -1), kwargs = {})
%view_1068 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%stack_63, [1, 1, 32, 128]), kwargs = {})
%transpose_155 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_1067, 1, 2), kwargs = {})
%transpose_156 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_1068, 1, 2), kwargs = {})
%transpose_157 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_1062, 1, 2), kwargs = {})
%slice_311 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg821_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_312 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_311, 1, 0, 9223372036854775807), kwargs = {})
%view_1069 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_156, [32, 1, 128]), kwargs = {})
%index_put_62 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_312, [None, None, %arg840_1], %view_1069), kwargs = {})
%slice_313 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg821_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_124 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_313, %index_put_62, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_125 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg821_1, %slice_scatter_124, 0, 0, 9223372036854775807), kwargs = {})
%slice_316 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg822_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_317 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%slice_316, 1, 0, 9223372036854775807), kwargs = {})
%view_1070 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%transpose_157, [32, 1, 128]), kwargs = {})
%index_put_63 : [num_users=1] = call_function[target=torch.ops.aten.index_put.default](args = (%slice_317, [None, None, %arg840_1], %view_1070), kwargs = {})
%slice_318 : [num_users=1] = call_function[target=torch.ops.aten.slice.Tensor](args = (%arg822_1, 0, 0, 9223372036854775807), kwargs = {})
%slice_scatter_126 : [num_users=1] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%slice_318, %index_put_63, 1, 0, 9223372036854775807), kwargs = {})
%slice_scatter_127 : [num_users=2] = call_function[target=torch.ops.aten.slice_scatter.default](args = (%arg822_1, %slice_scatter_126, 0, 0, 9223372036854775807), kwargs = {})
%unsqueeze_186 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%arg823_1, 0), kwargs = {})
%unsqueeze_187 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%unsqueeze_186, 1), kwargs = {})
%index_33 : [num_users=2] = call_function[target=torch.ops.aten.index.Tensor](args = (%unsqueeze_187, [None, None, %arg840_1]), kwargs = {})
%unsqueeze_189 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_125, 2), kwargs = {})
%expand_249 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_189, [1, 32, 1, 128, 128]), kwargs = {})
%clone_62 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_249,), kwargs = {memory_format: torch.contiguous_format})
%view_1071 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_62, [1, 32, 128, 128]), kwargs = {})
%unsqueeze_191 : [num_users=1] = call_function[target=torch.ops.aten.unsqueeze.default](args = (%slice_scatter_127, 2), kwargs = {})
%expand_251 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%unsqueeze_191, [1, 32, 1, 128, 128]), kwargs = {})
%clone_63 : [num_users=1] = call_function[target=torch.ops.aten.clone.default](args = (%expand_251,), kwargs = {memory_format: torch.contiguous_format})
%view_1072 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%clone_63, [1, 32, 128, 128]), kwargs = {})
%zeros_like_31 : [num_users=1] = call_function[target=torch.ops.aten.zeros_like.default](args = (%index_33,), kwargs = {dtype: torch.float32})
%logical_not_31 : [num_users=1] = call_function[target=torch.ops.aten.logical_not.default](args = (%index_33,), kwargs = {})
%masked_fill_31 : [num_users=1] = call_function[target=torch.ops.aten.masked_fill.Scalar](args = (%zeros_like_31, %logical_not_31, -inf), kwargs = {})
%mul_538 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_155, 0.29730177875068026), kwargs = {})
%transpose_158 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_1071, -2, -1), kwargs = {})
%mul_539 : [num_users=1] = call_function[target=torch.ops.aten.mul.Scalar](args = (%transpose_158, 0.29730177875068026), kwargs = {})
%expand_252 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_538, [1, 32, 1, 128]), kwargs = {})
%view_1073 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_252, [32, 1, 128]), kwargs = {})
%expand_253 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%mul_539, [1, 32, 128, 128]), kwargs = {})
%view_1074 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_253, [32, 128, 128]), kwargs = {})
%bmm_62 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_1073, %view_1074), kwargs = {})
%view_1075 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_62, [1, 32, 1, 128]), kwargs = {})
%add_220 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%view_1075, %masked_fill_31), kwargs = {})
%_softmax_31 : [num_users=1] = call_function[target=torch.ops.aten._softmax.default](args = (%add_220, -1, False), kwargs = {})
%expand_254 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%_softmax_31, [1, 32, 1, 128]), kwargs = {})
%view_1076 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_254, [32, 1, 128]), kwargs = {})
%expand_255 : [num_users=1] = call_function[target=torch.ops.aten.expand.default](args = (%view_1072, [1, 32, 128, 128]), kwargs = {})
%view_1077 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%expand_255, [32, 128, 128]), kwargs = {})
%bmm_63 : [num_users=1] = call_function[target=torch.ops.aten.bmm.default](args = (%view_1076, %view_1077), kwargs = {})
%view_1078 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%bmm_63, [1, 32, 1, 128]), kwargs = {})
%transpose_159 : [num_users=1] = call_function[target=torch.ops.aten.transpose.int](args = (%view_1078, 1, 2), kwargs = {})
%view_1079 : [num_users=2] = call_function[target=torch.ops.aten.view.default](args = (%transpose_159, [1, 1, 4096]), kwargs = {})
%choose_qparams_per_token_asymmetric_220 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%view_1079, torch.int8), kwargs = {})
%getitem_568 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_220, 0), kwargs = {})
%getitem_569 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_220, 1), kwargs = {})
%quantize_per_token_220 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%view_1079, %getitem_568, %getitem_569, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_220 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_220, %getitem_568, %getitem_569, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_220 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg824_1, %arg825_1, %arg826_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_220 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_220,), kwargs = {})
%view_1080 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_220, [1, 4096]), kwargs = {})
%mm_220 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1080, %t_220), kwargs = {})
%view_1081 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_220, [1, 1, 4096]), kwargs = {})
%add_221 : [num_users=3] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_216, %view_1081), kwargs = {})
%mul_540 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_221, %add_221), kwargs = {})
%mean_63 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_540, [-1], True), kwargs = {})
%add_222 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_63, 1e-06), kwargs = {})
%rsqrt_63 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_222,), kwargs = {})
%mul_541 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_221, %rsqrt_63), kwargs = {})
%mul_542 : [num_users=4] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_541, %arg64_1), kwargs = {})
%choose_qparams_per_token_asymmetric_221 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_542, torch.int8), kwargs = {})
%getitem_570 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_221, 0), kwargs = {})
%getitem_571 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_221, 1), kwargs = {})
%quantize_per_token_221 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_542, %getitem_570, %getitem_571, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_221 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_221, %getitem_570, %getitem_571, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_221 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg827_1, %arg828_1, %arg829_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_221 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_221,), kwargs = {})
%view_1082 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_221, [1, 4096]), kwargs = {})
%mm_221 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1082, %t_221), kwargs = {})
%view_1083 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_221, [1, 1, 11008]), kwargs = {})
%silu_31 : [num_users=1] = call_function[target=torch.ops.aten.silu.default](args = (%view_1083,), kwargs = {})
%choose_qparams_per_token_asymmetric_222 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_542, torch.int8), kwargs = {})
%getitem_572 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_222, 0), kwargs = {})
%getitem_573 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_222, 1), kwargs = {})
%quantize_per_token_222 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_542, %getitem_572, %getitem_573, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_222 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_222, %getitem_572, %getitem_573, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_222 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg830_1, %arg831_1, %arg832_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_222 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_222,), kwargs = {})
%view_1084 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_222, [1, 4096]), kwargs = {})
%mm_222 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1084, %t_222), kwargs = {})
%view_1085 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_222, [1, 1, 11008]), kwargs = {})
%mul_543 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%silu_31, %view_1085), kwargs = {})
%choose_qparams_per_token_asymmetric_223 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_543, torch.int8), kwargs = {})
%getitem_574 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_223, 0), kwargs = {})
%getitem_575 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_223, 1), kwargs = {})
%quantize_per_token_223 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_543, %getitem_574, %getitem_575, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_223 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_223, %getitem_574, %getitem_575, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_223 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg833_1, %arg834_1, %arg835_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_223 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_223,), kwargs = {})
%view_1086 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_223, [1, 11008]), kwargs = {})
%mm_223 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1086, %t_223), kwargs = {})
%view_1087 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_223, [1, 1, 4096]), kwargs = {})
%add_223 : [num_users=2] = call_function[target=torch.ops.aten.add.Tensor](args = (%add_221, %view_1087), kwargs = {})
%mul_544 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_223, %add_223), kwargs = {})
%mean_64 : [num_users=1] = call_function[target=torch.ops.aten.mean.dim](args = (%mul_544, [-1], True), kwargs = {})
%add_224 : [num_users=1] = call_function[target=torch.ops.aten.add.Tensor](args = (%mean_64, 1e-06), kwargs = {})
%rsqrt_64 : [num_users=1] = call_function[target=torch.ops.aten.rsqrt.default](args = (%add_224,), kwargs = {})
%mul_545 : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%add_223, %rsqrt_64), kwargs = {})
%mul_546 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%mul_545, %arg65_1), kwargs = {})
%choose_qparams_per_token_asymmetric_224 : [num_users=2] = call_function[target=torch.ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%mul_546, torch.int8), kwargs = {})
%getitem_576 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_224, 0), kwargs = {})
%getitem_577 : [num_users=2] = call_function[target=operator.getitem](args = (%choose_qparams_per_token_asymmetric_224, 1), kwargs = {})
%quantize_per_token_224 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.quantize_per_token.default](args = (%mul_546, %getitem_576, %getitem_577, -128, 127, torch.int8), kwargs = {})
%dequantize_per_token_224 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_token.default](args = (%quantize_per_token_224, %getitem_576, %getitem_577, -128, 127, torch.int8, torch.float32), kwargs = {})
%dequantize_per_channel_group_224 : [num_users=1] = call_function[target=torch.ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg836_1, %arg837_1, %arg838_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%t_224 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%dequantize_per_channel_group_224,), kwargs = {})
%view_1088 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%dequantize_per_token_224, [1, 4096]), kwargs = {})
%mm_224 : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%view_1088, %t_224), kwargs = {})
%view_1089 : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%mm_224, [1, 1, 32000]), kwargs = {})
return (slice_scatter_1, slice_scatter_3, slice_scatter_5, slice_scatter_7, slice_scatter_9, slice_scatter_11, slice_scatter_13, slice_scatter_15, slice_scatter_17, slice_scatter_19, slice_scatter_21, slice_scatter_23, slice_scatter_25, slice_scatter_27, slice_scatter_29, slice_scatter_31, slice_scatter_33, slice_scatter_35, slice_scatter_37, slice_scatter_39, slice_scatter_41, slice_scatter_43, slice_scatter_45, slice_scatter_47, slice_scatter_49, slice_scatter_51, slice_scatter_53, slice_scatter_55, slice_scatter_57, slice_scatter_59, slice_scatter_61, slice_scatter_63, slice_scatter_65, slice_scatter_67, slice_scatter_69, slice_scatter_71, slice_scatter_73, slice_scatter_75, slice_scatter_77, slice_scatter_79, slice_scatter_81, slice_scatter_83, slice_scatter_85, slice_scatter_87, slice_scatter_89, slice_scatter_91, slice_scatter_93, slice_scatter_95, slice_scatter_97, slice_scatter_99, slice_scatter_101, slice_scatter_103, slice_scatter_105, slice_scatter_107, slice_scatter_109, slice_scatter_111, slice_scatter_113, slice_scatter_115, slice_scatter_117, slice_scatter_119, slice_scatter_121, slice_scatter_123, slice_scatter_125, slice_scatter_127, view_1089)
INFO:root:Exported graph:
graph():
%arg0_1 : [num_users=1] = placeholder[target=arg0_1]
%arg1_1 : [num_users=1] = placeholder[target=arg1_1]
%arg2_1 : [num_users=1] = placeholder[target=arg2_1]
%arg3_1 : [num_users=1] = placeholder[target=arg3_1]
%arg4_1 : [num_users=1] = placeholder[target=arg4_1]
%arg5_1 : [num_users=1] = placeholder[target=arg5_1]
%arg6_1 : [num_users=1] = placeholder[target=arg6_1]
%arg7_1 : [num_users=1] = placeholder[target=arg7_1]
%arg8_1 : [num_users=1] = placeholder[target=arg8_1]
%arg9_1 : [num_users=1] = placeholder[target=arg9_1]
%arg10_1 : [num_users=1] = placeholder[target=arg10_1]
%arg11_1 : [num_users=1] = placeholder[target=arg11_1]
%arg12_1 : [num_users=1] = placeholder[target=arg12_1]
%arg13_1 : [num_users=1] = placeholder[target=arg13_1]
%arg14_1 : [num_users=1] = placeholder[target=arg14_1]
%arg15_1 : [num_users=1] = placeholder[target=arg15_1]
%arg16_1 : [num_users=1] = placeholder[target=arg16_1]
%arg17_1 : [num_users=1] = placeholder[target=arg17_1]
%arg18_1 : [num_users=1] = placeholder[target=arg18_1]
%arg19_1 : [num_users=1] = placeholder[target=arg19_1]
%arg20_1 : [num_users=1] = placeholder[target=arg20_1]
%arg21_1 : [num_users=1] = placeholder[target=arg21_1]
%arg22_1 : [num_users=1] = placeholder[target=arg22_1]
%arg23_1 : [num_users=1] = placeholder[target=arg23_1]
%arg24_1 : [num_users=1] = placeholder[target=arg24_1]
%arg25_1 : [num_users=1] = placeholder[target=arg25_1]
%arg26_1 : [num_users=1] = placeholder[target=arg26_1]
%arg27_1 : [num_users=1] = placeholder[target=arg27_1]
%arg28_1 : [num_users=1] = placeholder[target=arg28_1]
%arg29_1 : [num_users=1] = placeholder[target=arg29_1]
%arg30_1 : [num_users=1] = placeholder[target=arg30_1]
%arg31_1 : [num_users=1] = placeholder[target=arg31_1]
%arg32_1 : [num_users=1] = placeholder[target=arg32_1]
%arg33_1 : [num_users=1] = placeholder[target=arg33_1]
%arg34_1 : [num_users=1] = placeholder[target=arg34_1]
%arg35_1 : [num_users=1] = placeholder[target=arg35_1]
%arg36_1 : [num_users=1] = placeholder[target=arg36_1]
%arg37_1 : [num_users=1] = placeholder[target=arg37_1]
%arg38_1 : [num_users=1] = placeholder[target=arg38_1]
%arg39_1 : [num_users=1] = placeholder[target=arg39_1]
%arg40_1 : [num_users=1] = placeholder[target=arg40_1]
%arg41_1 : [num_users=1] = placeholder[target=arg41_1]
%arg42_1 : [num_users=1] = placeholder[target=arg42_1]
%arg43_1 : [num_users=1] = placeholder[target=arg43_1]
%arg44_1 : [num_users=1] = placeholder[target=arg44_1]
%arg45_1 : [num_users=1] = placeholder[target=arg45_1]
%arg46_1 : [num_users=1] = placeholder[target=arg46_1]
%arg47_1 : [num_users=1] = placeholder[target=arg47_1]
%arg48_1 : [num_users=1] = placeholder[target=arg48_1]
%arg49_1 : [num_users=1] = placeholder[target=arg49_1]
%arg50_1 : [num_users=1] = placeholder[target=arg50_1]
%arg51_1 : [num_users=1] = placeholder[target=arg51_1]
%arg52_1 : [num_users=1] = placeholder[target=arg52_1]
%arg53_1 : [num_users=1] = placeholder[target=arg53_1]
%arg54_1 : [num_users=1] = placeholder[target=arg54_1]
%arg55_1 : [num_users=1] = placeholder[target=arg55_1]
%arg56_1 : [num_users=1] = placeholder[target=arg56_1]
%arg57_1 : [num_users=1] = placeholder[target=arg57_1]
%arg58_1 : [num_users=1] = placeholder[target=arg58_1]
%arg59_1 : [num_users=1] = placeholder[target=arg59_1]
%arg60_1 : [num_users=1] = placeholder[target=arg60_1]
%arg61_1 : [num_users=1] = placeholder[target=arg61_1]
%arg62_1 : [num_users=1] = placeholder[target=arg62_1]
%arg63_1 : [num_users=1] = placeholder[target=arg63_1]
%arg64_1 : [num_users=1] = placeholder[target=arg64_1]
%arg65_1 : [num_users=1] = placeholder[target=arg65_1]
%arg66_1 : [num_users=1] = placeholder[target=arg66_1]
%arg67_1 : [num_users=1] = placeholder[target=arg67_1]
%arg68_1 : [num_users=1] = placeholder[target=arg68_1]
%arg69_1 : [num_users=1] = placeholder[target=arg69_1]
%arg70_1 : [num_users=1] = placeholder[target=arg70_1]
%arg71_1 : [num_users=1] = placeholder[target=arg71_1]
%arg72_1 : [num_users=1] = placeholder[target=arg72_1]
%arg73_1 : [num_users=1] = placeholder[target=arg73_1]
%arg74_1 : [num_users=1] = placeholder[target=arg74_1]
%arg75_1 : [num_users=1] = placeholder[target=arg75_1]
%arg76_1 : [num_users=1] = placeholder[target=arg76_1]
%arg77_1 : [num_users=3] = placeholder[target=arg77_1]
%arg78_1 : [num_users=3] = placeholder[target=arg78_1]
%arg79_1 : [num_users=1] = placeholder[target=arg79_1]
%arg80_1 : [num_users=1] = placeholder[target=arg80_1]
%arg81_1 : [num_users=1] = placeholder[target=arg81_1]
%arg82_1 : [num_users=1] = placeholder[target=arg82_1]
%arg83_1 : [num_users=1] = placeholder[target=arg83_1]
%arg84_1 : [num_users=1] = placeholder[target=arg84_1]
%arg85_1 : [num_users=1] = placeholder[target=arg85_1]
%arg86_1 : [num_users=1] = placeholder[target=arg86_1]
%arg87_1 : [num_users=1] = placeholder[target=arg87_1]
%arg88_1 : [num_users=1] = placeholder[target=arg88_1]
%arg89_1 : [num_users=1] = placeholder[target=arg89_1]
%arg90_1 : [num_users=1] = placeholder[target=arg90_1]
%arg91_1 : [num_users=1] = placeholder[target=arg91_1]
%arg92_1 : [num_users=1] = placeholder[target=arg92_1]
%arg93_1 : [num_users=1] = placeholder[target=arg93_1]
%arg94_1 : [num_users=1] = placeholder[target=arg94_1]
%arg95_1 : [num_users=1] = placeholder[target=arg95_1]
%arg96_1 : [num_users=1] = placeholder[target=arg96_1]
%arg97_1 : [num_users=1] = placeholder[target=arg97_1]
%arg98_1 : [num_users=1] = placeholder[target=arg98_1]
%arg99_1 : [num_users=1] = placeholder[target=arg99_1]
%arg100_1 : [num_users=1] = placeholder[target=arg100_1]
%arg101_1 : [num_users=3] = placeholder[target=arg101_1]
%arg102_1 : [num_users=3] = placeholder[target=arg102_1]
%arg103_1 : [num_users=1] = placeholder[target=arg103_1]
%arg104_1 : [num_users=1] = placeholder[target=arg104_1]
%arg105_1 : [num_users=1] = placeholder[target=arg105_1]
%arg106_1 : [num_users=1] = placeholder[target=arg106_1]
%arg107_1 : [num_users=1] = placeholder[target=arg107_1]
%arg108_1 : [num_users=1] = placeholder[target=arg108_1]
%arg109_1 : [num_users=1] = placeholder[target=arg109_1]
%arg110_1 : [num_users=1] = placeholder[target=arg110_1]
%arg111_1 : [num_users=1] = placeholder[target=arg111_1]
%arg112_1 : [num_users=1] = placeholder[target=arg112_1]
%arg113_1 : [num_users=1] = placeholder[target=arg113_1]
%arg114_1 : [num_users=1] = placeholder[target=arg114_1]
%arg115_1 : [num_users=1] = placeholder[target=arg115_1]
%arg116_1 : [num_users=1] = placeholder[target=arg116_1]
%arg117_1 : [num_users=1] = placeholder[target=arg117_1]
%arg118_1 : [num_users=1] = placeholder[target=arg118_1]
%arg119_1 : [num_users=1] = placeholder[target=arg119_1]
%arg120_1 : [num_users=1] = placeholder[target=arg120_1]
%arg121_1 : [num_users=1] = placeholder[target=arg121_1]
%arg122_1 : [num_users=1] = placeholder[target=arg122_1]
%arg123_1 : [num_users=1] = placeholder[target=arg123_1]
%arg124_1 : [num_users=1] = placeholder[target=arg124_1]
%arg125_1 : [num_users=3] = placeholder[target=arg125_1]
%arg126_1 : [num_users=3] = placeholder[target=arg126_1]
%arg127_1 : [num_users=1] = placeholder[target=arg127_1]
%arg128_1 : [num_users=1] = placeholder[target=arg128_1]
%arg129_1 : [num_users=1] = placeholder[target=arg129_1]
%arg130_1 : [num_users=1] = placeholder[target=arg130_1]
%arg131_1 : [num_users=1] = placeholder[target=arg131_1]
%arg132_1 : [num_users=1] = placeholder[target=arg132_1]
%arg133_1 : [num_users=1] = placeholder[target=arg133_1]
%arg134_1 : [num_users=1] = placeholder[target=arg134_1]
%arg135_1 : [num_users=1] = placeholder[target=arg135_1]
%arg136_1 : [num_users=1] = placeholder[target=arg136_1]
%arg137_1 : [num_users=1] = placeholder[target=arg137_1]
%arg138_1 : [num_users=1] = placeholder[target=arg138_1]
%arg139_1 : [num_users=1] = placeholder[target=arg139_1]
%arg140_1 : [num_users=1] = placeholder[target=arg140_1]
%arg141_1 : [num_users=1] = placeholder[target=arg141_1]
%arg142_1 : [num_users=1] = placeholder[target=arg142_1]
%arg143_1 : [num_users=1] = placeholder[target=arg143_1]
%arg144_1 : [num_users=1] = placeholder[target=arg144_1]
%arg145_1 : [num_users=1] = placeholder[target=arg145_1]
%arg146_1 : [num_users=1] = placeholder[target=arg146_1]
%arg147_1 : [num_users=1] = placeholder[target=arg147_1]
%arg148_1 : [num_users=1] = placeholder[target=arg148_1]
%arg149_1 : [num_users=3] = placeholder[target=arg149_1]
%arg150_1 : [num_users=3] = placeholder[target=arg150_1]
%arg151_1 : [num_users=1] = placeholder[target=arg151_1]
%arg152_1 : [num_users=1] = placeholder[target=arg152_1]
%arg153_1 : [num_users=1] = placeholder[target=arg153_1]
%arg154_1 : [num_users=1] = placeholder[target=arg154_1]
%arg155_1 : [num_users=1] = placeholder[target=arg155_1]
%arg156_1 : [num_users=1] = placeholder[target=arg156_1]
%arg157_1 : [num_users=1] = placeholder[target=arg157_1]
%arg158_1 : [num_users=1] = placeholder[target=arg158_1]
%arg159_1 : [num_users=1] = placeholder[target=arg159_1]
%arg160_1 : [num_users=1] = placeholder[target=arg160_1]
%arg161_1 : [num_users=1] = placeholder[target=arg161_1]
%arg162_1 : [num_users=1] = placeholder[target=arg162_1]
%arg163_1 : [num_users=1] = placeholder[target=arg163_1]
%arg164_1 : [num_users=1] = placeholder[target=arg164_1]
%arg165_1 : [num_users=1] = placeholder[target=arg165_1]
%arg166_1 : [num_users=1] = placeholder[target=arg166_1]
%arg167_1 : [num_users=1] = placeholder[target=arg167_1]
%arg168_1 : [num_users=1] = placeholder[target=arg168_1]
%arg169_1 : [num_users=1] = placeholder[target=arg169_1]
%arg170_1 : [num_users=1] = placeholder[target=arg170_1]
%arg171_1 : [num_users=1] = placeholder[target=arg171_1]
%arg172_1 : [num_users=1] = placeholder[target=arg172_1]
%arg173_1 : [num_users=3] = placeholder[target=arg173_1]
%arg174_1 : [num_users=3] = placeholder[target=arg174_1]
%arg175_1 : [num_users=1] = placeholder[target=arg175_1]
%arg176_1 : [num_users=1] = placeholder[target=arg176_1]
%arg177_1 : [num_users=1] = placeholder[target=arg177_1]
%arg178_1 : [num_users=1] = placeholder[target=arg178_1]
%arg179_1 : [num_users=1] = placeholder[target=arg179_1]
%arg180_1 : [num_users=1] = placeholder[target=arg180_1]
%arg181_1 : [num_users=1] = placeholder[target=arg181_1]
%arg182_1 : [num_users=1] = placeholder[target=arg182_1]
%arg183_1 : [num_users=1] = placeholder[target=arg183_1]
%arg184_1 : [num_users=1] = placeholder[target=arg184_1]
%arg185_1 : [num_users=1] = placeholder[target=arg185_1]
%arg186_1 : [num_users=1] = placeholder[target=arg186_1]
%arg187_1 : [num_users=1] = placeholder[target=arg187_1]
%arg188_1 : [num_users=1] = placeholder[target=arg188_1]
%arg189_1 : [num_users=1] = placeholder[target=arg189_1]
%arg190_1 : [num_users=1] = placeholder[target=arg190_1]
%arg191_1 : [num_users=1] = placeholder[target=arg191_1]
%arg192_1 : [num_users=1] = placeholder[target=arg192_1]
%arg193_1 : [num_users=1] = placeholder[target=arg193_1]
%arg194_1 : [num_users=1] = placeholder[target=arg194_1]
%arg195_1 : [num_users=1] = placeholder[target=arg195_1]
%arg196_1 : [num_users=1] = placeholder[target=arg196_1]
%arg197_1 : [num_users=3] = placeholder[target=arg197_1]
%arg198_1 : [num_users=3] = placeholder[target=arg198_1]
%arg199_1 : [num_users=1] = placeholder[target=arg199_1]
%arg200_1 : [num_users=1] = placeholder[target=arg200_1]
%arg201_1 : [num_users=1] = placeholder[target=arg201_1]
%arg202_1 : [num_users=1] = placeholder[target=arg202_1]
%arg203_1 : [num_users=1] = placeholder[target=arg203_1]
%arg204_1 : [num_users=1] = placeholder[target=arg204_1]
%arg205_1 : [num_users=1] = placeholder[target=arg205_1]
%arg206_1 : [num_users=1] = placeholder[target=arg206_1]
%arg207_1 : [num_users=1] = placeholder[target=arg207_1]
%arg208_1 : [num_users=1] = placeholder[target=arg208_1]
%arg209_1 : [num_users=1] = placeholder[target=arg209_1]
%arg210_1 : [num_users=1] = placeholder[target=arg210_1]
%arg211_1 : [num_users=1] = placeholder[target=arg211_1]
%arg212_1 : [num_users=1] = placeholder[target=arg212_1]
%arg213_1 : [num_users=1] = placeholder[target=arg213_1]
%arg214_1 : [num_users=1] = placeholder[target=arg214_1]
%arg215_1 : [num_users=1] = placeholder[target=arg215_1]
%arg216_1 : [num_users=1] = placeholder[target=arg216_1]
%arg217_1 : [num_users=1] = placeholder[target=arg217_1]
%arg218_1 : [num_users=1] = placeholder[target=arg218_1]
%arg219_1 : [num_users=1] = placeholder[target=arg219_1]
%arg220_1 : [num_users=1] = placeholder[target=arg220_1]
%arg221_1 : [num_users=3] = placeholder[target=arg221_1]
%arg222_1 : [num_users=3] = placeholder[target=arg222_1]
%arg223_1 : [num_users=1] = placeholder[target=arg223_1]
%arg224_1 : [num_users=1] = placeholder[target=arg224_1]
%arg225_1 : [num_users=1] = placeholder[target=arg225_1]
%arg226_1 : [num_users=1] = placeholder[target=arg226_1]
%arg227_1 : [num_users=1] = placeholder[target=arg227_1]
%arg228_1 : [num_users=1] = placeholder[target=arg228_1]
%arg229_1 : [num_users=1] = placeholder[target=arg229_1]
%arg230_1 : [num_users=1] = placeholder[target=arg230_1]
%arg231_1 : [num_users=1] = placeholder[target=arg231_1]
%arg232_1 : [num_users=1] = placeholder[target=arg232_1]
%arg233_1 : [num_users=1] = placeholder[target=arg233_1]
%arg234_1 : [num_users=1] = placeholder[target=arg234_1]
%arg235_1 : [num_users=1] = placeholder[target=arg235_1]
%arg236_1 : [num_users=1] = placeholder[target=arg236_1]
%arg237_1 : [num_users=1] = placeholder[target=arg237_1]
%arg238_1 : [num_users=1] = placeholder[target=arg238_1]
%arg239_1 : [num_users=1] = placeholder[target=arg239_1]
%arg240_1 : [num_users=1] = placeholder[target=arg240_1]
%arg241_1 : [num_users=1] = placeholder[target=arg241_1]
%arg242_1 : [num_users=1] = placeholder[target=arg242_1]
%arg243_1 : [num_users=1] = placeholder[target=arg243_1]
%arg244_1 : [num_users=1] = placeholder[target=arg244_1]
%arg245_1 : [num_users=3] = placeholder[target=arg245_1]
%arg246_1 : [num_users=3] = placeholder[target=arg246_1]
%arg247_1 : [num_users=1] = placeholder[target=arg247_1]
%arg248_1 : [num_users=1] = placeholder[target=arg248_1]
%arg249_1 : [num_users=1] = placeholder[target=arg249_1]
%arg250_1 : [num_users=1] = placeholder[target=arg250_1]
%arg251_1 : [num_users=1] = placeholder[target=arg251_1]
%arg252_1 : [num_users=1] = placeholder[target=arg252_1]
%arg253_1 : [num_users=1] = placeholder[target=arg253_1]
%arg254_1 : [num_users=1] = placeholder[target=arg254_1]
%arg255_1 : [num_users=1] = placeholder[target=arg255_1]
%arg256_1 : [num_users=1] = placeholder[target=arg256_1]
%arg257_1 : [num_users=1] = placeholder[target=arg257_1]
%arg258_1 : [num_users=1] = placeholder[target=arg258_1]
%arg259_1 : [num_users=1] = placeholder[target=arg259_1]
%arg260_1 : [num_users=1] = placeholder[target=arg260_1]
%arg261_1 : [num_users=1] = placeholder[target=arg261_1]
%arg262_1 : [num_users=1] = placeholder[target=arg262_1]
%arg263_1 : [num_users=1] = placeholder[target=arg263_1]
%arg264_1 : [num_users=1] = placeholder[target=arg264_1]
%arg265_1 : [num_users=1] = placeholder[target=arg265_1]
%arg266_1 : [num_users=1] = placeholder[target=arg266_1]
%arg267_1 : [num_users=1] = placeholder[target=arg267_1]
%arg268_1 : [num_users=1] = placeholder[target=arg268_1]
%arg269_1 : [num_users=3] = placeholder[target=arg269_1]
%arg270_1 : [num_users=3] = placeholder[target=arg270_1]
%arg271_1 : [num_users=1] = placeholder[target=arg271_1]
%arg272_1 : [num_users=1] = placeholder[target=arg272_1]
%arg273_1 : [num_users=1] = placeholder[target=arg273_1]
%arg274_1 : [num_users=1] = placeholder[target=arg274_1]
%arg275_1 : [num_users=1] = placeholder[target=arg275_1]
%arg276_1 : [num_users=1] = placeholder[target=arg276_1]
%arg277_1 : [num_users=1] = placeholder[target=arg277_1]
%arg278_1 : [num_users=1] = placeholder[target=arg278_1]
%arg279_1 : [num_users=1] = placeholder[target=arg279_1]
%arg280_1 : [num_users=1] = placeholder[target=arg280_1]
%arg281_1 : [num_users=1] = placeholder[target=arg281_1]
%arg282_1 : [num_users=1] = placeholder[target=arg282_1]
%arg283_1 : [num_users=1] = placeholder[target=arg283_1]
%arg284_1 : [num_users=1] = placeholder[target=arg284_1]
%arg285_1 : [num_users=1] = placeholder[target=arg285_1]
%arg286_1 : [num_users=1] = placeholder[target=arg286_1]
%arg287_1 : [num_users=1] = placeholder[target=arg287_1]
%arg288_1 : [num_users=1] = placeholder[target=arg288_1]
%arg289_1 : [num_users=1] = placeholder[target=arg289_1]
%arg290_1 : [num_users=1] = placeholder[target=arg290_1]
%arg291_1 : [num_users=1] = placeholder[target=arg291_1]
%arg292_1 : [num_users=1] = placeholder[target=arg292_1]
%arg293_1 : [num_users=3] = placeholder[target=arg293_1]
%arg294_1 : [num_users=3] = placeholder[target=arg294_1]
%arg295_1 : [num_users=1] = placeholder[target=arg295_1]
%arg296_1 : [num_users=1] = placeholder[target=arg296_1]
%arg297_1 : [num_users=1] = placeholder[target=arg297_1]
%arg298_1 : [num_users=1] = placeholder[target=arg298_1]
%arg299_1 : [num_users=1] = placeholder[target=arg299_1]
%arg300_1 : [num_users=1] = placeholder[target=arg300_1]
%arg301_1 : [num_users=1] = placeholder[target=arg301_1]
%arg302_1 : [num_users=1] = placeholder[target=arg302_1]
%arg303_1 : [num_users=1] = placeholder[target=arg303_1]
%arg304_1 : [num_users=1] = placeholder[target=arg304_1]
%arg305_1 : [num_users=1] = placeholder[target=arg305_1]
%arg306_1 : [num_users=1] = placeholder[target=arg306_1]
%arg307_1 : [num_users=1] = placeholder[target=arg307_1]
%arg308_1 : [num_users=1] = placeholder[target=arg308_1]
%arg309_1 : [num_users=1] = placeholder[target=arg309_1]
%arg310_1 : [num_users=1] = placeholder[target=arg310_1]
%arg311_1 : [num_users=1] = placeholder[target=arg311_1]
%arg312_1 : [num_users=1] = placeholder[target=arg312_1]
%arg313_1 : [num_users=1] = placeholder[target=arg313_1]
%arg314_1 : [num_users=1] = placeholder[target=arg314_1]
%arg315_1 : [num_users=1] = placeholder[target=arg315_1]
%arg316_1 : [num_users=1] = placeholder[target=arg316_1]
%arg317_1 : [num_users=3] = placeholder[target=arg317_1]
%arg318_1 : [num_users=3] = placeholder[target=arg318_1]
%arg319_1 : [num_users=1] = placeholder[target=arg319_1]
%arg320_1 : [num_users=1] = placeholder[target=arg320_1]
%arg321_1 : [num_users=1] = placeholder[target=arg321_1]
%arg322_1 : [num_users=1] = placeholder[target=arg322_1]
%arg323_1 : [num_users=1] = placeholder[target=arg323_1]
%arg324_1 : [num_users=1] = placeholder[target=arg324_1]
%arg325_1 : [num_users=1] = placeholder[target=arg325_1]
%arg326_1 : [num_users=1] = placeholder[target=arg326_1]
%arg327_1 : [num_users=1] = placeholder[target=arg327_1]
%arg328_1 : [num_users=1] = placeholder[target=arg328_1]
%arg329_1 : [num_users=1] = placeholder[target=arg329_1]
%arg330_1 : [num_users=1] = placeholder[target=arg330_1]
%arg331_1 : [num_users=1] = placeholder[target=arg331_1]
%arg332_1 : [num_users=1] = placeholder[target=arg332_1]
%arg333_1 : [num_users=1] = placeholder[target=arg333_1]
%arg334_1 : [num_users=1] = placeholder[target=arg334_1]
%arg335_1 : [num_users=1] = placeholder[target=arg335_1]
%arg336_1 : [num_users=1] = placeholder[target=arg336_1]
%arg337_1 : [num_users=1] = placeholder[target=arg337_1]
%arg338_1 : [num_users=1] = placeholder[target=arg338_1]
%arg339_1 : [num_users=1] = placeholder[target=arg339_1]
%arg340_1 : [num_users=1] = placeholder[target=arg340_1]
%arg341_1 : [num_users=3] = placeholder[target=arg341_1]
%arg342_1 : [num_users=3] = placeholder[target=arg342_1]
%arg343_1 : [num_users=1] = placeholder[target=arg343_1]
%arg344_1 : [num_users=1] = placeholder[target=arg344_1]
%arg345_1 : [num_users=1] = placeholder[target=arg345_1]
%arg346_1 : [num_users=1] = placeholder[target=arg346_1]
%arg347_1 : [num_users=1] = placeholder[target=arg347_1]
%arg348_1 : [num_users=1] = placeholder[target=arg348_1]
%arg349_1 : [num_users=1] = placeholder[target=arg349_1]
%arg350_1 : [num_users=1] = placeholder[target=arg350_1]
%arg351_1 : [num_users=1] = placeholder[target=arg351_1]
%arg352_1 : [num_users=1] = placeholder[target=arg352_1]
%arg353_1 : [num_users=1] = placeholder[target=arg353_1]
%arg354_1 : [num_users=1] = placeholder[target=arg354_1]
%arg355_1 : [num_users=1] = placeholder[target=arg355_1]
%arg356_1 : [num_users=1] = placeholder[target=arg356_1]
%arg357_1 : [num_users=1] = placeholder[target=arg357_1]
%arg358_1 : [num_users=1] = placeholder[target=arg358_1]
%arg359_1 : [num_users=1] = placeholder[target=arg359_1]
%arg360_1 : [num_users=1] = placeholder[target=arg360_1]
%arg361_1 : [num_users=1] = placeholder[target=arg361_1]
%arg362_1 : [num_users=1] = placeholder[target=arg362_1]
%arg363_1 : [num_users=1] = placeholder[target=arg363_1]
%arg364_1 : [num_users=1] = placeholder[target=arg364_1]
%arg365_1 : [num_users=3] = placeholder[target=arg365_1]
%arg366_1 : [num_users=3] = placeholder[target=arg366_1]
%arg367_1 : [num_users=1] = placeholder[target=arg367_1]
%arg368_1 : [num_users=1] = placeholder[target=arg368_1]
%arg369_1 : [num_users=1] = placeholder[target=arg369_1]
%arg370_1 : [num_users=1] = placeholder[target=arg370_1]
%arg371_1 : [num_users=1] = placeholder[target=arg371_1]
%arg372_1 : [num_users=1] = placeholder[target=arg372_1]
%arg373_1 : [num_users=1] = placeholder[target=arg373_1]
%arg374_1 : [num_users=1] = placeholder[target=arg374_1]
%arg375_1 : [num_users=1] = placeholder[target=arg375_1]
%arg376_1 : [num_users=1] = placeholder[target=arg376_1]
%arg377_1 : [num_users=1] = placeholder[target=arg377_1]
%arg378_1 : [num_users=1] = placeholder[target=arg378_1]
%arg379_1 : [num_users=1] = placeholder[target=arg379_1]
%arg380_1 : [num_users=1] = placeholder[target=arg380_1]
%arg381_1 : [num_users=1] = placeholder[target=arg381_1]
%arg382_1 : [num_users=1] = placeholder[target=arg382_1]
%arg383_1 : [num_users=1] = placeholder[target=arg383_1]
%arg384_1 : [num_users=1] = placeholder[target=arg384_1]
%arg385_1 : [num_users=1] = placeholder[target=arg385_1]
%arg386_1 : [num_users=1] = placeholder[target=arg386_1]
%arg387_1 : [num_users=1] = placeholder[target=arg387_1]
%arg388_1 : [num_users=1] = placeholder[target=arg388_1]
%arg389_1 : [num_users=3] = placeholder[target=arg389_1]
%arg390_1 : [num_users=3] = placeholder[target=arg390_1]
%arg391_1 : [num_users=1] = placeholder[target=arg391_1]
%arg392_1 : [num_users=1] = placeholder[target=arg392_1]
%arg393_1 : [num_users=1] = placeholder[target=arg393_1]
%arg394_1 : [num_users=1] = placeholder[target=arg394_1]
%arg395_1 : [num_users=1] = placeholder[target=arg395_1]
%arg396_1 : [num_users=1] = placeholder[target=arg396_1]
%arg397_1 : [num_users=1] = placeholder[target=arg397_1]
%arg398_1 : [num_users=1] = placeholder[target=arg398_1]
%arg399_1 : [num_users=1] = placeholder[target=arg399_1]
%arg400_1 : [num_users=1] = placeholder[target=arg400_1]
%arg401_1 : [num_users=1] = placeholder[target=arg401_1]
%arg402_1 : [num_users=1] = placeholder[target=arg402_1]
%arg403_1 : [num_users=1] = placeholder[target=arg403_1]
%arg404_1 : [num_users=1] = placeholder[target=arg404_1]
%arg405_1 : [num_users=1] = placeholder[target=arg405_1]
%arg406_1 : [num_users=1] = placeholder[target=arg406_1]
%arg407_1 : [num_users=1] = placeholder[target=arg407_1]
%arg408_1 : [num_users=1] = placeholder[target=arg408_1]
%arg409_1 : [num_users=1] = placeholder[target=arg409_1]
%arg410_1 : [num_users=1] = placeholder[target=arg410_1]
%arg411_1 : [num_users=1] = placeholder[target=arg411_1]
%arg412_1 : [num_users=1] = placeholder[target=arg412_1]
%arg413_1 : [num_users=3] = placeholder[target=arg413_1]
%arg414_1 : [num_users=3] = placeholder[target=arg414_1]
%arg415_1 : [num_users=1] = placeholder[target=arg415_1]
%arg416_1 : [num_users=1] = placeholder[target=arg416_1]
%arg417_1 : [num_users=1] = placeholder[target=arg417_1]
%arg418_1 : [num_users=1] = placeholder[target=arg418_1]
%arg419_1 : [num_users=1] = placeholder[target=arg419_1]
%arg420_1 : [num_users=1] = placeholder[target=arg420_1]
%arg421_1 : [num_users=1] = placeholder[target=arg421_1]
%arg422_1 : [num_users=1] = placeholder[target=arg422_1]
%arg423_1 : [num_users=1] = placeholder[target=arg423_1]
%arg424_1 : [num_users=1] = placeholder[target=arg424_1]
%arg425_1 : [num_users=1] = placeholder[target=arg425_1]
%arg426_1 : [num_users=1] = placeholder[target=arg426_1]
%arg427_1 : [num_users=1] = placeholder[target=arg427_1]
%arg428_1 : [num_users=1] = placeholder[target=arg428_1]
%arg429_1 : [num_users=1] = placeholder[target=arg429_1]
%arg430_1 : [num_users=1] = placeholder[target=arg430_1]
%arg431_1 : [num_users=1] = placeholder[target=arg431_1]
%arg432_1 : [num_users=1] = placeholder[target=arg432_1]
%arg433_1 : [num_users=1] = placeholder[target=arg433_1]
%arg434_1 : [num_users=1] = placeholder[target=arg434_1]
%arg435_1 : [num_users=1] = placeholder[target=arg435_1]
%arg436_1 : [num_users=1] = placeholder[target=arg436_1]
%arg437_1 : [num_users=3] = placeholder[target=arg437_1]
%arg438_1 : [num_users=3] = placeholder[target=arg438_1]
%arg439_1 : [num_users=1] = placeholder[target=arg439_1]
%arg440_1 : [num_users=1] = placeholder[target=arg440_1]
%arg441_1 : [num_users=1] = placeholder[target=arg441_1]
%arg442_1 : [num_users=1] = placeholder[target=arg442_1]
%arg443_1 : [num_users=1] = placeholder[target=arg443_1]
%arg444_1 : [num_users=1] = placeholder[target=arg444_1]
%arg445_1 : [num_users=1] = placeholder[target=arg445_1]
%arg446_1 : [num_users=1] = placeholder[target=arg446_1]
%arg447_1 : [num_users=1] = placeholder[target=arg447_1]
%arg448_1 : [num_users=1] = placeholder[target=arg448_1]
%arg449_1 : [num_users=1] = placeholder[target=arg449_1]
%arg450_1 : [num_users=1] = placeholder[target=arg450_1]
%arg451_1 : [num_users=1] = placeholder[target=arg451_1]
%arg452_1 : [num_users=1] = placeholder[target=arg452_1]
%arg453_1 : [num_users=1] = placeholder[target=arg453_1]
%arg454_1 : [num_users=1] = placeholder[target=arg454_1]
%arg455_1 : [num_users=1] = placeholder[target=arg455_1]
%arg456_1 : [num_users=1] = placeholder[target=arg456_1]
%arg457_1 : [num_users=1] = placeholder[target=arg457_1]
%arg458_1 : [num_users=1] = placeholder[target=arg458_1]
%arg459_1 : [num_users=1] = placeholder[target=arg459_1]
%arg460_1 : [num_users=1] = placeholder[target=arg460_1]
%arg461_1 : [num_users=3] = placeholder[target=arg461_1]
%arg462_1 : [num_users=3] = placeholder[target=arg462_1]
%arg463_1 : [num_users=1] = placeholder[target=arg463_1]
%arg464_1 : [num_users=1] = placeholder[target=arg464_1]
%arg465_1 : [num_users=1] = placeholder[target=arg465_1]
%arg466_1 : [num_users=1] = placeholder[target=arg466_1]
%arg467_1 : [num_users=1] = placeholder[target=arg467_1]
%arg468_1 : [num_users=1] = placeholder[target=arg468_1]
%arg469_1 : [num_users=1] = placeholder[target=arg469_1]
%arg470_1 : [num_users=1] = placeholder[target=arg470_1]
%arg471_1 : [num_users=1] = placeholder[target=arg471_1]
%arg472_1 : [num_users=1] = placeholder[target=arg472_1]
%arg473_1 : [num_users=1] = placeholder[target=arg473_1]
%arg474_1 : [num_users=1] = placeholder[target=arg474_1]
%arg475_1 : [num_users=1] = placeholder[target=arg475_1]
%arg476_1 : [num_users=1] = placeholder[target=arg476_1]
%arg477_1 : [num_users=1] = placeholder[target=arg477_1]
%arg478_1 : [num_users=1] = placeholder[target=arg478_1]
%arg479_1 : [num_users=1] = placeholder[target=arg479_1]
%arg480_1 : [num_users=1] = placeholder[target=arg480_1]
%arg481_1 : [num_users=1] = placeholder[target=arg481_1]
%arg482_1 : [num_users=1] = placeholder[target=arg482_1]
%arg483_1 : [num_users=1] = placeholder[target=arg483_1]
%arg484_1 : [num_users=1] = placeholder[target=arg484_1]
%arg485_1 : [num_users=3] = placeholder[target=arg485_1]
%arg486_1 : [num_users=3] = placeholder[target=arg486_1]
%arg487_1 : [num_users=1] = placeholder[target=arg487_1]
%arg488_1 : [num_users=1] = placeholder[target=arg488_1]
%arg489_1 : [num_users=1] = placeholder[target=arg489_1]
%arg490_1 : [num_users=1] = placeholder[target=arg490_1]
%arg491_1 : [num_users=1] = placeholder[target=arg491_1]
%arg492_1 : [num_users=1] = placeholder[target=arg492_1]
%arg493_1 : [num_users=1] = placeholder[target=arg493_1]
%arg494_1 : [num_users=1] = placeholder[target=arg494_1]
%arg495_1 : [num_users=1] = placeholder[target=arg495_1]
%arg496_1 : [num_users=1] = placeholder[target=arg496_1]
%arg497_1 : [num_users=1] = placeholder[target=arg497_1]
%arg498_1 : [num_users=1] = placeholder[target=arg498_1]
%arg499_1 : [num_users=1] = placeholder[target=arg499_1]
%arg500_1 : [num_users=1] = placeholder[target=arg500_1]
%arg501_1 : [num_users=1] = placeholder[target=arg501_1]
%arg502_1 : [num_users=1] = placeholder[target=arg502_1]
%arg503_1 : [num_users=1] = placeholder[target=arg503_1]
%arg504_1 : [num_users=1] = placeholder[target=arg504_1]
%arg505_1 : [num_users=1] = placeholder[target=arg505_1]
%arg506_1 : [num_users=1] = placeholder[target=arg506_1]
%arg507_1 : [num_users=1] = placeholder[target=arg507_1]
%arg508_1 : [num_users=1] = placeholder[target=arg508_1]
%arg509_1 : [num_users=3] = placeholder[target=arg509_1]
%arg510_1 : [num_users=3] = placeholder[target=arg510_1]
%arg511_1 : [num_users=1] = placeholder[target=arg511_1]
%arg512_1 : [num_users=1] = placeholder[target=arg512_1]
%arg513_1 : [num_users=1] = placeholder[target=arg513_1]
%arg514_1 : [num_users=1] = placeholder[target=arg514_1]
%arg515_1 : [num_users=1] = placeholder[target=arg515_1]
%arg516_1 : [num_users=1] = placeholder[target=arg516_1]
%arg517_1 : [num_users=1] = placeholder[target=arg517_1]
%arg518_1 : [num_users=1] = placeholder[target=arg518_1]
%arg519_1 : [num_users=1] = placeholder[target=arg519_1]
%arg520_1 : [num_users=1] = placeholder[target=arg520_1]
%arg521_1 : [num_users=1] = placeholder[target=arg521_1]
%arg522_1 : [num_users=1] = placeholder[target=arg522_1]
%arg523_1 : [num_users=1] = placeholder[target=arg523_1]
%arg524_1 : [num_users=1] = placeholder[target=arg524_1]
%arg525_1 : [num_users=1] = placeholder[target=arg525_1]
%arg526_1 : [num_users=1] = placeholder[target=arg526_1]
%arg527_1 : [num_users=1] = placeholder[target=arg527_1]
%arg528_1 : [num_users=1] = placeholder[target=arg528_1]
%arg529_1 : [num_users=1] = placeholder[target=arg529_1]
%arg530_1 : [num_users=1] = placeholder[target=arg530_1]
%arg531_1 : [num_users=1] = placeholder[target=arg531_1]
%arg532_1 : [num_users=1] = placeholder[target=arg532_1]
%arg533_1 : [num_users=3] = placeholder[target=arg533_1]
%arg534_1 : [num_users=3] = placeholder[target=arg534_1]
%arg535_1 : [num_users=1] = placeholder[target=arg535_1]
%arg536_1 : [num_users=1] = placeholder[target=arg536_1]
%arg537_1 : [num_users=1] = placeholder[target=arg537_1]
%arg538_1 : [num_users=1] = placeholder[target=arg538_1]
%arg539_1 : [num_users=1] = placeholder[target=arg539_1]
%arg540_1 : [num_users=1] = placeholder[target=arg540_1]
%arg541_1 : [num_users=1] = placeholder[target=arg541_1]
%arg542_1 : [num_users=1] = placeholder[target=arg542_1]
%arg543_1 : [num_users=1] = placeholder[target=arg543_1]
%arg544_1 : [num_users=1] = placeholder[target=arg544_1]
%arg545_1 : [num_users=1] = placeholder[target=arg545_1]
%arg546_1 : [num_users=1] = placeholder[target=arg546_1]
%arg547_1 : [num_users=1] = placeholder[target=arg547_1]
%arg548_1 : [num_users=1] = placeholder[target=arg548_1]
%arg549_1 : [num_users=1] = placeholder[target=arg549_1]
%arg550_1 : [num_users=1] = placeholder[target=arg550_1]
%arg551_1 : [num_users=1] = placeholder[target=arg551_1]
%arg552_1 : [num_users=1] = placeholder[target=arg552_1]
%arg553_1 : [num_users=1] = placeholder[target=arg553_1]
%arg554_1 : [num_users=1] = placeholder[target=arg554_1]
%arg555_1 : [num_users=1] = placeholder[target=arg555_1]
%arg556_1 : [num_users=1] = placeholder[target=arg556_1]
%arg557_1 : [num_users=3] = placeholder[target=arg557_1]
%arg558_1 : [num_users=3] = placeholder[target=arg558_1]
%arg559_1 : [num_users=1] = placeholder[target=arg559_1]
%arg560_1 : [num_users=1] = placeholder[target=arg560_1]
%arg561_1 : [num_users=1] = placeholder[target=arg561_1]
%arg562_1 : [num_users=1] = placeholder[target=arg562_1]
%arg563_1 : [num_users=1] = placeholder[target=arg563_1]
%arg564_1 : [num_users=1] = placeholder[target=arg564_1]
%arg565_1 : [num_users=1] = placeholder[target=arg565_1]
%arg566_1 : [num_users=1] = placeholder[target=arg566_1]
%arg567_1 : [num_users=1] = placeholder[target=arg567_1]
%arg568_1 : [num_users=1] = placeholder[target=arg568_1]
%arg569_1 : [num_users=1] = placeholder[target=arg569_1]
%arg570_1 : [num_users=1] = placeholder[target=arg570_1]
%arg571_1 : [num_users=1] = placeholder[target=arg571_1]
%arg572_1 : [num_users=1] = placeholder[target=arg572_1]
%arg573_1 : [num_users=1] = placeholder[target=arg573_1]
%arg574_1 : [num_users=1] = placeholder[target=arg574_1]
%arg575_1 : [num_users=1] = placeholder[target=arg575_1]
%arg576_1 : [num_users=1] = placeholder[target=arg576_1]
%arg577_1 : [num_users=1] = placeholder[target=arg577_1]
%arg578_1 : [num_users=1] = placeholder[target=arg578_1]
%arg579_1 : [num_users=1] = placeholder[target=arg579_1]
%arg580_1 : [num_users=1] = placeholder[target=arg580_1]
%arg581_1 : [num_users=3] = placeholder[target=arg581_1]
%arg582_1 : [num_users=3] = placeholder[target=arg582_1]
%arg583_1 : [num_users=1] = placeholder[target=arg583_1]
%arg584_1 : [num_users=1] = placeholder[target=arg584_1]
%arg585_1 : [num_users=1] = placeholder[target=arg585_1]
%arg586_1 : [num_users=1] = placeholder[target=arg586_1]
%arg587_1 : [num_users=1] = placeholder[target=arg587_1]
%arg588_1 : [num_users=1] = placeholder[target=arg588_1]
%arg589_1 : [num_users=1] = placeholder[target=arg589_1]
%arg590_1 : [num_users=1] = placeholder[target=arg590_1]
%arg591_1 : [num_users=1] = placeholder[target=arg591_1]
%arg592_1 : [num_users=1] = placeholder[target=arg592_1]
%arg593_1 : [num_users=1] = placeholder[target=arg593_1]
%arg594_1 : [num_users=1] = placeholder[target=arg594_1]
%arg595_1 : [num_users=1] = placeholder[target=arg595_1]
%arg596_1 : [num_users=1] = placeholder[target=arg596_1]
%arg597_1 : [num_users=1] = placeholder[target=arg597_1]
%arg598_1 : [num_users=1] = placeholder[target=arg598_1]
%arg599_1 : [num_users=1] = placeholder[target=arg599_1]
%arg600_1 : [num_users=1] = placeholder[target=arg600_1]
%arg601_1 : [num_users=1] = placeholder[target=arg601_1]
%arg602_1 : [num_users=1] = placeholder[target=arg602_1]
%arg603_1 : [num_users=1] = placeholder[target=arg603_1]
%arg604_1 : [num_users=1] = placeholder[target=arg604_1]
%arg605_1 : [num_users=3] = placeholder[target=arg605_1]
%arg606_1 : [num_users=3] = placeholder[target=arg606_1]
%arg607_1 : [num_users=1] = placeholder[target=arg607_1]
%arg608_1 : [num_users=1] = placeholder[target=arg608_1]
%arg609_1 : [num_users=1] = placeholder[target=arg609_1]
%arg610_1 : [num_users=1] = placeholder[target=arg610_1]
%arg611_1 : [num_users=1] = placeholder[target=arg611_1]
%arg612_1 : [num_users=1] = placeholder[target=arg612_1]
%arg613_1 : [num_users=1] = placeholder[target=arg613_1]
%arg614_1 : [num_users=1] = placeholder[target=arg614_1]
%arg615_1 : [num_users=1] = placeholder[target=arg615_1]
%arg616_1 : [num_users=1] = placeholder[target=arg616_1]
%arg617_1 : [num_users=1] = placeholder[target=arg617_1]
%arg618_1 : [num_users=1] = placeholder[target=arg618_1]
%arg619_1 : [num_users=1] = placeholder[target=arg619_1]
%arg620_1 : [num_users=1] = placeholder[target=arg620_1]
%arg621_1 : [num_users=1] = placeholder[target=arg621_1]
%arg622_1 : [num_users=1] = placeholder[target=arg622_1]
%arg623_1 : [num_users=1] = placeholder[target=arg623_1]
%arg624_1 : [num_users=1] = placeholder[target=arg624_1]
%arg625_1 : [num_users=1] = placeholder[target=arg625_1]
%arg626_1 : [num_users=1] = placeholder[target=arg626_1]
%arg627_1 : [num_users=1] = placeholder[target=arg627_1]
%arg628_1 : [num_users=1] = placeholder[target=arg628_1]
%arg629_1 : [num_users=3] = placeholder[target=arg629_1]
%arg630_1 : [num_users=3] = placeholder[target=arg630_1]
%arg631_1 : [num_users=1] = placeholder[target=arg631_1]
%arg632_1 : [num_users=1] = placeholder[target=arg632_1]
%arg633_1 : [num_users=1] = placeholder[target=arg633_1]
%arg634_1 : [num_users=1] = placeholder[target=arg634_1]
%arg635_1 : [num_users=1] = placeholder[target=arg635_1]
%arg636_1 : [num_users=1] = placeholder[target=arg636_1]
%arg637_1 : [num_users=1] = placeholder[target=arg637_1]
%arg638_1 : [num_users=1] = placeholder[target=arg638_1]
%arg639_1 : [num_users=1] = placeholder[target=arg639_1]
%arg640_1 : [num_users=1] = placeholder[target=arg640_1]
%arg641_1 : [num_users=1] = placeholder[target=arg641_1]
%arg642_1 : [num_users=1] = placeholder[target=arg642_1]
%arg643_1 : [num_users=1] = placeholder[target=arg643_1]
%arg644_1 : [num_users=1] = placeholder[target=arg644_1]
%arg645_1 : [num_users=1] = placeholder[target=arg645_1]
%arg646_1 : [num_users=1] = placeholder[target=arg646_1]
%arg647_1 : [num_users=1] = placeholder[target=arg647_1]
%arg648_1 : [num_users=1] = placeholder[target=arg648_1]
%arg649_1 : [num_users=1] = placeholder[target=arg649_1]
%arg650_1 : [num_users=1] = placeholder[target=arg650_1]
%arg651_1 : [num_users=1] = placeholder[target=arg651_1]
%arg652_1 : [num_users=1] = placeholder[target=arg652_1]
%arg653_1 : [num_users=3] = placeholder[target=arg653_1]
%arg654_1 : [num_users=3] = placeholder[target=arg654_1]
%arg655_1 : [num_users=1] = placeholder[target=arg655_1]
%arg656_1 : [num_users=1] = placeholder[target=arg656_1]
%arg657_1 : [num_users=1] = placeholder[target=arg657_1]
%arg658_1 : [num_users=1] = placeholder[target=arg658_1]
%arg659_1 : [num_users=1] = placeholder[target=arg659_1]
%arg660_1 : [num_users=1] = placeholder[target=arg660_1]
%arg661_1 : [num_users=1] = placeholder[target=arg661_1]
%arg662_1 : [num_users=1] = placeholder[target=arg662_1]
%arg663_1 : [num_users=1] = placeholder[target=arg663_1]
%arg664_1 : [num_users=1] = placeholder[target=arg664_1]
%arg665_1 : [num_users=1] = placeholder[target=arg665_1]
%arg666_1 : [num_users=1] = placeholder[target=arg666_1]
%arg667_1 : [num_users=1] = placeholder[target=arg667_1]
%arg668_1 : [num_users=1] = placeholder[target=arg668_1]
%arg669_1 : [num_users=1] = placeholder[target=arg669_1]
%arg670_1 : [num_users=1] = placeholder[target=arg670_1]
%arg671_1 : [num_users=1] = placeholder[target=arg671_1]
%arg672_1 : [num_users=1] = placeholder[target=arg672_1]
%arg673_1 : [num_users=1] = placeholder[target=arg673_1]
%arg674_1 : [num_users=1] = placeholder[target=arg674_1]
%arg675_1 : [num_users=1] = placeholder[target=arg675_1]
%arg676_1 : [num_users=1] = placeholder[target=arg676_1]
%arg677_1 : [num_users=3] = placeholder[target=arg677_1]
%arg678_1 : [num_users=3] = placeholder[target=arg678_1]
%arg679_1 : [num_users=1] = placeholder[target=arg679_1]
%arg680_1 : [num_users=1] = placeholder[target=arg680_1]
%arg681_1 : [num_users=1] = placeholder[target=arg681_1]
%arg682_1 : [num_users=1] = placeholder[target=arg682_1]
%arg683_1 : [num_users=1] = placeholder[target=arg683_1]
%arg684_1 : [num_users=1] = placeholder[target=arg684_1]
%arg685_1 : [num_users=1] = placeholder[target=arg685_1]
%arg686_1 : [num_users=1] = placeholder[target=arg686_1]
%arg687_1 : [num_users=1] = placeholder[target=arg687_1]
%arg688_1 : [num_users=1] = placeholder[target=arg688_1]
%arg689_1 : [num_users=1] = placeholder[target=arg689_1]
%arg690_1 : [num_users=1] = placeholder[target=arg690_1]
%arg691_1 : [num_users=1] = placeholder[target=arg691_1]
%arg692_1 : [num_users=1] = placeholder[target=arg692_1]
%arg693_1 : [num_users=1] = placeholder[target=arg693_1]
%arg694_1 : [num_users=1] = placeholder[target=arg694_1]
%arg695_1 : [num_users=1] = placeholder[target=arg695_1]
%arg696_1 : [num_users=1] = placeholder[target=arg696_1]
%arg697_1 : [num_users=1] = placeholder[target=arg697_1]
%arg698_1 : [num_users=1] = placeholder[target=arg698_1]
%arg699_1 : [num_users=1] = placeholder[target=arg699_1]
%arg700_1 : [num_users=1] = placeholder[target=arg700_1]
%arg701_1 : [num_users=3] = placeholder[target=arg701_1]
%arg702_1 : [num_users=3] = placeholder[target=arg702_1]
%arg703_1 : [num_users=1] = placeholder[target=arg703_1]
%arg704_1 : [num_users=1] = placeholder[target=arg704_1]
%arg705_1 : [num_users=1] = placeholder[target=arg705_1]
%arg706_1 : [num_users=1] = placeholder[target=arg706_1]
%arg707_1 : [num_users=1] = placeholder[target=arg707_1]
%arg708_1 : [num_users=1] = placeholder[target=arg708_1]
%arg709_1 : [num_users=1] = placeholder[target=arg709_1]
%arg710_1 : [num_users=1] = placeholder[target=arg710_1]
%arg711_1 : [num_users=1] = placeholder[target=arg711_1]
%arg712_1 : [num_users=1] = placeholder[target=arg712_1]
%arg713_1 : [num_users=1] = placeholder[target=arg713_1]
%arg714_1 : [num_users=1] = placeholder[target=arg714_1]
%arg715_1 : [num_users=1] = placeholder[target=arg715_1]
%arg716_1 : [num_users=1] = placeholder[target=arg716_1]
%arg717_1 : [num_users=1] = placeholder[target=arg717_1]
%arg718_1 : [num_users=1] = placeholder[target=arg718_1]
%arg719_1 : [num_users=1] = placeholder[target=arg719_1]
%arg720_1 : [num_users=1] = placeholder[target=arg720_1]
%arg721_1 : [num_users=1] = placeholder[target=arg721_1]
%arg722_1 : [num_users=1] = placeholder[target=arg722_1]
%arg723_1 : [num_users=1] = placeholder[target=arg723_1]
%arg724_1 : [num_users=1] = placeholder[target=arg724_1]
%arg725_1 : [num_users=3] = placeholder[target=arg725_1]
%arg726_1 : [num_users=3] = placeholder[target=arg726_1]
%arg727_1 : [num_users=1] = placeholder[target=arg727_1]
%arg728_1 : [num_users=1] = placeholder[target=arg728_1]
%arg729_1 : [num_users=1] = placeholder[target=arg729_1]
%arg730_1 : [num_users=1] = placeholder[target=arg730_1]
%arg731_1 : [num_users=1] = placeholder[target=arg731_1]
%arg732_1 : [num_users=1] = placeholder[target=arg732_1]
%arg733_1 : [num_users=1] = placeholder[target=arg733_1]
%arg734_1 : [num_users=1] = placeholder[target=arg734_1]
%arg735_1 : [num_users=1] = placeholder[target=arg735_1]
%arg736_1 : [num_users=1] = placeholder[target=arg736_1]
%arg737_1 : [num_users=1] = placeholder[target=arg737_1]
%arg738_1 : [num_users=1] = placeholder[target=arg738_1]
%arg739_1 : [num_users=1] = placeholder[target=arg739_1]
%arg740_1 : [num_users=1] = placeholder[target=arg740_1]
%arg741_1 : [num_users=1] = placeholder[target=arg741_1]
%arg742_1 : [num_users=1] = placeholder[target=arg742_1]
%arg743_1 : [num_users=1] = placeholder[target=arg743_1]
%arg744_1 : [num_users=1] = placeholder[target=arg744_1]
%arg745_1 : [num_users=1] = placeholder[target=arg745_1]
%arg746_1 : [num_users=1] = placeholder[target=arg746_1]
%arg747_1 : [num_users=1] = placeholder[target=arg747_1]
%arg748_1 : [num_users=1] = placeholder[target=arg748_1]
%arg749_1 : [num_users=3] = placeholder[target=arg749_1]
%arg750_1 : [num_users=3] = placeholder[target=arg750_1]
%arg751_1 : [num_users=1] = placeholder[target=arg751_1]
%arg752_1 : [num_users=1] = placeholder[target=arg752_1]
%arg753_1 : [num_users=1] = placeholder[target=arg753_1]
%arg754_1 : [num_users=1] = placeholder[target=arg754_1]
%arg755_1 : [num_users=1] = placeholder[target=arg755_1]
%arg756_1 : [num_users=1] = placeholder[target=arg756_1]
%arg757_1 : [num_users=1] = placeholder[target=arg757_1]
%arg758_1 : [num_users=1] = placeholder[target=arg758_1]
%arg759_1 : [num_users=1] = placeholder[target=arg759_1]
%arg760_1 : [num_users=1] = placeholder[target=arg760_1]
%arg761_1 : [num_users=1] = placeholder[target=arg761_1]
%arg762_1 : [num_users=1] = placeholder[target=arg762_1]
%arg763_1 : [num_users=1] = placeholder[target=arg763_1]
%arg764_1 : [num_users=1] = placeholder[target=arg764_1]
%arg765_1 : [num_users=1] = placeholder[target=arg765_1]
%arg766_1 : [num_users=1] = placeholder[target=arg766_1]
%arg767_1 : [num_users=1] = placeholder[target=arg767_1]
%arg768_1 : [num_users=1] = placeholder[target=arg768_1]
%arg769_1 : [num_users=1] = placeholder[target=arg769_1]
%arg770_1 : [num_users=1] = placeholder[target=arg770_1]
%arg771_1 : [num_users=1] = placeholder[target=arg771_1]
%arg772_1 : [num_users=1] = placeholder[target=arg772_1]
%arg773_1 : [num_users=3] = placeholder[target=arg773_1]
%arg774_1 : [num_users=3] = placeholder[target=arg774_1]
%arg775_1 : [num_users=1] = placeholder[target=arg775_1]
%arg776_1 : [num_users=1] = placeholder[target=arg776_1]
%arg777_1 : [num_users=1] = placeholder[target=arg777_1]
%arg778_1 : [num_users=1] = placeholder[target=arg778_1]
%arg779_1 : [num_users=1] = placeholder[target=arg779_1]
%arg780_1 : [num_users=1] = placeholder[target=arg780_1]
%arg781_1 : [num_users=1] = placeholder[target=arg781_1]
%arg782_1 : [num_users=1] = placeholder[target=arg782_1]
%arg783_1 : [num_users=1] = placeholder[target=arg783_1]
%arg784_1 : [num_users=1] = placeholder[target=arg784_1]
%arg785_1 : [num_users=1] = placeholder[target=arg785_1]
%arg786_1 : [num_users=1] = placeholder[target=arg786_1]
%arg787_1 : [num_users=1] = placeholder[target=arg787_1]
%arg788_1 : [num_users=1] = placeholder[target=arg788_1]
%arg789_1 : [num_users=1] = placeholder[target=arg789_1]
%arg790_1 : [num_users=1] = placeholder[target=arg790_1]
%arg791_1 : [num_users=1] = placeholder[target=arg791_1]
%arg792_1 : [num_users=1] = placeholder[target=arg792_1]
%arg793_1 : [num_users=1] = placeholder[target=arg793_1]
%arg794_1 : [num_users=1] = placeholder[target=arg794_1]
%arg795_1 : [num_users=1] = placeholder[target=arg795_1]
%arg796_1 : [num_users=1] = placeholder[target=arg796_1]
%arg797_1 : [num_users=3] = placeholder[target=arg797_1]
%arg798_1 : [num_users=3] = placeholder[target=arg798_1]
%arg799_1 : [num_users=1] = placeholder[target=arg799_1]
%arg800_1 : [num_users=1] = placeholder[target=arg800_1]
%arg801_1 : [num_users=1] = placeholder[target=arg801_1]
%arg802_1 : [num_users=1] = placeholder[target=arg802_1]
%arg803_1 : [num_users=1] = placeholder[target=arg803_1]
%arg804_1 : [num_users=1] = placeholder[target=arg804_1]
%arg805_1 : [num_users=1] = placeholder[target=arg805_1]
%arg806_1 : [num_users=1] = placeholder[target=arg806_1]
%arg807_1 : [num_users=1] = placeholder[target=arg807_1]
%arg808_1 : [num_users=1] = placeholder[target=arg808_1]
%arg809_1 : [num_users=1] = placeholder[target=arg809_1]
%arg810_1 : [num_users=1] = placeholder[target=arg810_1]
%arg811_1 : [num_users=1] = placeholder[target=arg811_1]
%arg812_1 : [num_users=1] = placeholder[target=arg812_1]
%arg813_1 : [num_users=1] = placeholder[target=arg813_1]
%arg814_1 : [num_users=1] = placeholder[target=arg814_1]
%arg815_1 : [num_users=1] = placeholder[target=arg815_1]
%arg816_1 : [num_users=1] = placeholder[target=arg816_1]
%arg817_1 : [num_users=1] = placeholder[target=arg817_1]
%arg818_1 : [num_users=1] = placeholder[target=arg818_1]
%arg819_1 : [num_users=1] = placeholder[target=arg819_1]
%arg820_1 : [num_users=1] = placeholder[target=arg820_1]
%arg821_1 : [num_users=3] = placeholder[target=arg821_1]
%arg822_1 : [num_users=3] = placeholder[target=arg822_1]
%arg823_1 : [num_users=1] = placeholder[target=arg823_1]
%arg824_1 : [num_users=1] = placeholder[target=arg824_1]
%arg825_1 : [num_users=1] = placeholder[target=arg825_1]
%arg826_1 : [num_users=1] = placeholder[target=arg826_1]
%arg827_1 : [num_users=1] = placeholder[target=arg827_1]
%arg828_1 : [num_users=1] = placeholder[target=arg828_1]
%arg829_1 : [num_users=1] = placeholder[target=arg829_1]
%arg830_1 : [num_users=1] = placeholder[target=arg830_1]
%arg831_1 : [num_users=1] = placeholder[target=arg831_1]
%arg832_1 : [num_users=1] = placeholder[target=arg832_1]
%arg833_1 : [num_users=1] = placeholder[target=arg833_1]
%arg834_1 : [num_users=1] = placeholder[target=arg834_1]
%arg835_1 : [num_users=1] = placeholder[target=arg835_1]
%arg836_1 : [num_users=1] = placeholder[target=arg836_1]
%arg837_1 : [num_users=1] = placeholder[target=arg837_1]
%arg838_1 : [num_users=1] = placeholder[target=arg838_1]
%_lifted_tensor_constant773 : [num_users=1] = placeholder[target=_lifted_tensor_constant773]
%_lifted_tensor_constant774 : [num_users=1] = placeholder[target=_lifted_tensor_constant774]
%_lifted_tensor_constant775 : [num_users=1] = placeholder[target=_lifted_tensor_constant775]
%_lifted_tensor_constant776 : [num_users=1] = placeholder[target=_lifted_tensor_constant776]
%_lifted_tensor_constant777 : [num_users=1] = placeholder[target=_lifted_tensor_constant777]
%_lifted_tensor_constant778 : [num_users=1] = placeholder[target=_lifted_tensor_constant778]
%_lifted_tensor_constant779 : [num_users=1] = placeholder[target=_lifted_tensor_constant779]
%_lifted_tensor_constant780 : [num_users=1] = placeholder[target=_lifted_tensor_constant780]
%_lifted_tensor_constant781 : [num_users=1] = placeholder[target=_lifted_tensor_constant781]
%_lifted_tensor_constant782 : [num_users=1] = placeholder[target=_lifted_tensor_constant782]
%_lifted_tensor_constant783 : [num_users=1] = placeholder[target=_lifted_tensor_constant783]
%_lifted_tensor_constant784 : [num_users=1] = placeholder[target=_lifted_tensor_constant784]
%_lifted_tensor_constant785 : [num_users=1] = placeholder[target=_lifted_tensor_constant785]
%_lifted_tensor_constant786 : [num_users=1] = placeholder[target=_lifted_tensor_constant786]
%_lifted_tensor_constant787 : [num_users=1] = placeholder[target=_lifted_tensor_constant787]
%_lifted_tensor_constant788 : [num_users=1] = placeholder[target=_lifted_tensor_constant788]
%_lifted_tensor_constant789 : [num_users=1] = placeholder[target=_lifted_tensor_constant789]
%_lifted_tensor_constant790 : [num_users=1] = placeholder[target=_lifted_tensor_constant790]
%_lifted_tensor_constant791 : [num_users=1] = placeholder[target=_lifted_tensor_constant791]
%_lifted_tensor_constant792 : [num_users=1] = placeholder[target=_lifted_tensor_constant792]
%_lifted_tensor_constant793 : [num_users=1] = placeholder[target=_lifted_tensor_constant793]
%_lifted_tensor_constant794 : [num_users=1] = placeholder[target=_lifted_tensor_constant794]
%_lifted_tensor_constant795 : [num_users=1] = placeholder[target=_lifted_tensor_constant795]
%_lifted_tensor_constant796 : [num_users=1] = placeholder[target=_lifted_tensor_constant796]
%_lifted_tensor_constant797 : [num_users=1] = placeholder[target=_lifted_tensor_constant797]
%_lifted_tensor_constant798 : [num_users=1] = placeholder[target=_lifted_tensor_constant798]
%_lifted_tensor_constant799 : [num_users=1] = placeholder[target=_lifted_tensor_constant799]
%_lifted_tensor_constant800 : [num_users=1] = placeholder[target=_lifted_tensor_constant800]
%_lifted_tensor_constant801 : [num_users=1] = placeholder[target=_lifted_tensor_constant801]
%_lifted_tensor_constant802 : [num_users=1] = placeholder[target=_lifted_tensor_constant802]
%_lifted_tensor_constant803 : [num_users=1] = placeholder[target=_lifted_tensor_constant803]
%_lifted_tensor_constant804 : [num_users=1] = placeholder[target=_lifted_tensor_constant804]
%_lifted_tensor_constant805 : [num_users=1] = placeholder[target=_lifted_tensor_constant805]
%_lifted_tensor_constant806 : [num_users=1] = placeholder[target=_lifted_tensor_constant806]
%_lifted_tensor_constant807 : [num_users=1] = placeholder[target=_lifted_tensor_constant807]
%_lifted_tensor_constant808 : [num_users=1] = placeholder[target=_lifted_tensor_constant808]
%_lifted_tensor_constant809 : [num_users=1] = placeholder[target=_lifted_tensor_constant809]
%_lifted_tensor_constant810 : [num_users=1] = placeholder[target=_lifted_tensor_constant810]
%_lifted_tensor_constant811 : [num_users=1] = placeholder[target=_lifted_tensor_constant811]
%_lifted_tensor_constant812 : [num_users=1] = placeholder[target=_lifted_tensor_constant812]
%_lifted_tensor_constant813 : [num_users=1] = placeholder[target=_lifted_tensor_constant813]
%_lifted_tensor_constant814 : [num_users=1] = placeholder[target=_lifted_tensor_constant814]
%_lifted_tensor_constant815 : [num_users=1] = placeholder[target=_lifted_tensor_constant815]
%_lifted_tensor_constant816 : [num_users=1] = placeholder[target=_lifted_tensor_constant816]
%_lifted_tensor_constant817 : [num_users=1] = placeholder[target=_lifted_tensor_constant817]
%_lifted_tensor_constant818 : [num_users=1] = placeholder[target=_lifted_tensor_constant818]
%_lifted_tensor_constant819 : [num_users=1] = placeholder[target=_lifted_tensor_constant819]
%_lifted_tensor_constant820 : [num_users=1] = placeholder[target=_lifted_tensor_constant820]
%_lifted_tensor_constant821 : [num_users=1] = placeholder[target=_lifted_tensor_constant821]
%_lifted_tensor_constant822 : [num_users=1] = placeholder[target=_lifted_tensor_constant822]
%_lifted_tensor_constant823 : [num_users=1] = placeholder[target=_lifted_tensor_constant823]
%_lifted_tensor_constant824 : [num_users=1] = placeholder[target=_lifted_tensor_constant824]
%_lifted_tensor_constant825 : [num_users=1] = placeholder[target=_lifted_tensor_constant825]
%_lifted_tensor_constant826 : [num_users=1] = placeholder[target=_lifted_tensor_constant826]
%_lifted_tensor_constant827 : [num_users=1] = placeholder[target=_lifted_tensor_constant827]
%_lifted_tensor_constant828 : [num_users=1] = placeholder[target=_lifted_tensor_constant828]
%_lifted_tensor_constant829 : [num_users=1] = placeholder[target=_lifted_tensor_constant829]
%_lifted_tensor_constant830 : [num_users=1] = placeholder[target=_lifted_tensor_constant830]
%_lifted_tensor_constant831 : [num_users=1] = placeholder[target=_lifted_tensor_constant831]
%_lifted_tensor_constant832 : [num_users=1] = placeholder[target=_lifted_tensor_constant832]
%_lifted_tensor_constant833 : [num_users=1] = placeholder[target=_lifted_tensor_constant833]
%_lifted_tensor_constant834 : [num_users=1] = placeholder[target=_lifted_tensor_constant834]
%_lifted_tensor_constant835 : [num_users=1] = placeholder[target=_lifted_tensor_constant835]
%_lifted_tensor_constant836 : [num_users=1] = placeholder[target=_lifted_tensor_constant836]
%_lifted_tensor_constant837 : [num_users=1] = placeholder[target=_lifted_tensor_constant837]
%arg839_1 : [num_users=1] = placeholder[target=arg839_1]
%arg840_1 : [num_users=98] = placeholder[target=arg840_1]
%aten_embedding_default : [num_users=3] = call_function[target=executorch.exir.dialects.edge._ops.aten.embedding.default](args = (%arg0_1, %arg839_1), kwargs = {})
%aten_index_tensor : [num_users=32] = call_function[target=executorch.exir.dialects.edge._ops.aten.index.Tensor](args = (%arg66_1, [%arg840_1]), kwargs = {})
%aten_index_tensor_1 : [num_users=32] = call_function[target=executorch.exir.dialects.edge._ops.aten.index.Tensor](args = (%arg67_1, [%arg840_1]), kwargs = {})
%aten_mul_tensor : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mul.Tensor](args = (%aten_embedding_default, %aten_embedding_default), kwargs = {})
%aten_mean_dim : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mean.dim](args = (%aten_mul_tensor, [-1], True), kwargs = {})
%aten_add_tensor : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%aten_mean_dim, %_lifted_tensor_constant773), kwargs = {})
%aten_rsqrt_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.rsqrt.default](args = (%aten_add_tensor,), kwargs = {})
%aten_mul_tensor_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mul.Tensor](args = (%aten_embedding_default, %aten_rsqrt_default), kwargs = {})
%aten_mul_tensor_2 : [num_users=6] = call_function[target=executorch.exir.dialects.edge._ops.aten.mul.Tensor](args = (%aten_mul_tensor_1, %arg1_1), kwargs = {})
%quantized_decomposed_choose_qparams_per_token_asymmetric_default : [num_users=2] = call_function[target=executorch.exir.dialects.edge._ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%aten_mul_tensor_2, torch.int8), kwargs = {})
%getitem : [num_users=2] = call_function[target=operator.getitem](args = (%quantized_decomposed_choose_qparams_per_token_asymmetric_default, 0), kwargs = {})
%getitem_1 : [num_users=2] = call_function[target=operator.getitem](args = (%quantized_decomposed_choose_qparams_per_token_asymmetric_default, 1), kwargs = {})
%quantized_decomposed_quantize_per_token_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.quantized_decomposed.quantize_per_token.default](args = (%aten_mul_tensor_2, %getitem, %getitem_1, -128, 127, torch.int8), kwargs = {})
%quantized_decomposed_dequantize_per_token_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.quantized_decomposed.dequantize_per_token.default](args = (%quantized_decomposed_quantize_per_token_default, %getitem, %getitem_1, -128, 127, torch.int8, torch.float32), kwargs = {})
%quantized_decomposed_dequantize_per_channel_group_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg68_1, %arg69_1, %arg70_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%aten_permute_copy_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.permute_copy.default](args = (%quantized_decomposed_dequantize_per_channel_group_default, [1, 0]), kwargs = {})
%aten_view_copy_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%quantized_decomposed_dequantize_per_token_default, [1, 4096]), kwargs = {})
%aten_mm_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mm.default](args = (%aten_view_copy_default, %aten_permute_copy_default), kwargs = {})
%aten_view_copy_default_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%aten_mm_default, [1, 1, 4096]), kwargs = {})
%quantized_decomposed_choose_qparams_per_token_asymmetric_default_1 : [num_users=2] = call_function[target=executorch.exir.dialects.edge._ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%aten_mul_tensor_2, torch.int8), kwargs = {})
%getitem_2 : [num_users=2] = call_function[target=operator.getitem](args = (%quantized_decomposed_choose_qparams_per_token_asymmetric_default_1, 0), kwargs = {})
%getitem_3 : [num_users=2] = call_function[target=operator.getitem](args = (%quantized_decomposed_choose_qparams_per_token_asymmetric_default_1, 1), kwargs = {})
%quantized_decomposed_quantize_per_token_default_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.quantized_decomposed.quantize_per_token.default](args = (%aten_mul_tensor_2, %getitem_2, %getitem_3, -128, 127, torch.int8), kwargs = {})
%quantized_decomposed_dequantize_per_token_default_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.quantized_decomposed.dequantize_per_token.default](args = (%quantized_decomposed_quantize_per_token_default_1, %getitem_2, %getitem_3, -128, 127, torch.int8, torch.float32), kwargs = {})
%quantized_decomposed_dequantize_per_channel_group_default_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg71_1, %arg72_1, %arg73_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%aten_permute_copy_default_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.permute_copy.default](args = (%quantized_decomposed_dequantize_per_channel_group_default_1, [1, 0]), kwargs = {})
%aten_view_copy_default_2 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%quantized_decomposed_dequantize_per_token_default_1, [1, 4096]), kwargs = {})
%aten_mm_default_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mm.default](args = (%aten_view_copy_default_2, %aten_permute_copy_default_1), kwargs = {})
%aten_view_copy_default_3 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%aten_mm_default_1, [1, 1, 4096]), kwargs = {})
%quantized_decomposed_choose_qparams_per_token_asymmetric_default_2 : [num_users=2] = call_function[target=executorch.exir.dialects.edge._ops.quantized_decomposed.choose_qparams_per_token_asymmetric.default](args = (%aten_mul_tensor_2, torch.int8), kwargs = {})
%getitem_4 : [num_users=2] = call_function[target=operator.getitem](args = (%quantized_decomposed_choose_qparams_per_token_asymmetric_default_2, 0), kwargs = {})
%getitem_5 : [num_users=2] = call_function[target=operator.getitem](args = (%quantized_decomposed_choose_qparams_per_token_asymmetric_default_2, 1), kwargs = {})
%quantized_decomposed_quantize_per_token_default_2 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.quantized_decomposed.quantize_per_token.default](args = (%aten_mul_tensor_2, %getitem_4, %getitem_5, -128, 127, torch.int8), kwargs = {})
%quantized_decomposed_dequantize_per_token_default_2 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.quantized_decomposed.dequantize_per_token.default](args = (%quantized_decomposed_quantize_per_token_default_2, %getitem_4, %getitem_5, -128, 127, torch.int8, torch.float32), kwargs = {})
%quantized_decomposed_dequantize_per_channel_group_default_2 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.quantized_decomposed.dequantize_per_channel_group.default](args = (%arg74_1, %arg75_1, %arg76_1, -8, 7, torch.int8, 128, torch.float32), kwargs = {})
%aten_permute_copy_default_2 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.permute_copy.default](args = (%quantized_decomposed_dequantize_per_channel_group_default_2, [1, 0]), kwargs = {})
%aten_view_copy_default_4 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%quantized_decomposed_dequantize_per_token_default_2, [1, 4096]), kwargs = {})
%aten_mm_default_2 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mm.default](args = (%aten_view_copy_default_4, %aten_permute_copy_default_2), kwargs = {})
%aten_view_copy_default_5 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%aten_mm_default_2, [1, 1, 4096]), kwargs = {})
%aten_view_copy_default_6 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%aten_view_copy_default_1, [1, 1, 32, 128]), kwargs = {})
%aten_view_copy_default_7 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%aten_view_copy_default_3, [1, 1, 32, 128]), kwargs = {})
%aten_view_copy_default_8 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%aten_view_copy_default_5, [1, 1, 32, 128]), kwargs = {})
%aten_view_copy_default_9 : [num_users=2] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%aten_view_copy_default_6, [1, 1, 32, -1, 2]), kwargs = {})
%aten_slice_copy_tensor : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.slice_copy.Tensor](args = (%aten_view_copy_default_9, 4, 0, 1), kwargs = {})
%aten_slice_copy_tensor_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.slice_copy.Tensor](args = (%aten_view_copy_default_9, 4, 1, 2), kwargs = {})
%aten_squeeze_copy_dims : [num_users=2] = call_function[target=executorch.exir.dialects.edge._ops.aten.squeeze_copy.dims](args = (%aten_slice_copy_tensor, [4]), kwargs = {})
%aten_squeeze_copy_dims_1 : [num_users=2] = call_function[target=executorch.exir.dialects.edge._ops.aten.squeeze_copy.dims](args = (%aten_slice_copy_tensor_1, [4]), kwargs = {})
%aten_view_copy_default_10 : [num_users=2] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%aten_view_copy_default_7, [1, 1, 32, -1, 2]), kwargs = {})
%aten_slice_copy_tensor_2 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.slice_copy.Tensor](args = (%aten_view_copy_default_10, 4, 0, 1), kwargs = {})
%aten_slice_copy_tensor_3 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.slice_copy.Tensor](args = (%aten_view_copy_default_10, 4, 1, 2), kwargs = {})
%aten_squeeze_copy_dims_2 : [num_users=2] = call_function[target=executorch.exir.dialects.edge._ops.aten.squeeze_copy.dims](args = (%aten_slice_copy_tensor_2, [4]), kwargs = {})
%aten_squeeze_copy_dims_3 : [num_users=2] = call_function[target=executorch.exir.dialects.edge._ops.aten.squeeze_copy.dims](args = (%aten_slice_copy_tensor_3, [4]), kwargs = {})
%aten_view_copy_default_11 : [num_users=4] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%aten_index_tensor, [1, 1, 1, 64]), kwargs = {})
%aten_view_copy_default_12 : [num_users=4] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%aten_index_tensor_1, [1, 1, 1, 64]), kwargs = {})
%aten_mul_tensor_3 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mul.Tensor](args = (%aten_squeeze_copy_dims, %aten_view_copy_default_11), kwargs = {})
%aten_mul_tensor_4 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mul.Tensor](args = (%aten_squeeze_copy_dims_1, %aten_view_copy_default_12), kwargs = {})
%aten_sub_tensor : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.sub.Tensor](args = (%aten_mul_tensor_3, %aten_mul_tensor_4), kwargs = {})
%aten_mul_tensor_5 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mul.Tensor](args = (%aten_squeeze_copy_dims, %aten_view_copy_default_12), kwargs = {})
%aten_mul_tensor_6 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mul.Tensor](args = (%aten_squeeze_copy_dims_1, %aten_view_copy_default_11), kwargs = {})
%aten_add_tensor_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%aten_mul_tensor_5, %aten_mul_tensor_6), kwargs = {})
%aten_mul_tensor_7 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mul.Tensor](args = (%aten_squeeze_copy_dims_2, %aten_view_copy_default_11), kwargs = {})
%aten_mul_tensor_8 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mul.Tensor](args = (%aten_squeeze_copy_dims_3, %aten_view_copy_default_12), kwargs = {})
%aten_sub_tensor_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.sub.Tensor](args = (%aten_mul_tensor_7, %aten_mul_tensor_8), kwargs = {})
%aten_mul_tensor_9 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mul.Tensor](args = (%aten_squeeze_copy_dims_2, %aten_view_copy_default_12), kwargs = {})
%aten_mul_tensor_10 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.mul.Tensor](args = (%aten_squeeze_copy_dims_3, %aten_view_copy_default_11), kwargs = {})
%aten_add_tensor_2 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.add.Tensor](args = (%aten_mul_tensor_9, %aten_mul_tensor_10), kwargs = {})
%aten_unsqueeze_copy_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.unsqueeze_copy.default](args = (%aten_sub_tensor, 4), kwargs = {})
%aten_unsqueeze_copy_default_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.unsqueeze_copy.default](args = (%aten_add_tensor_1, 4), kwargs = {})
%aten_cat_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.cat.default](args = ([%aten_unsqueeze_copy_default, %aten_unsqueeze_copy_default_1], -1), kwargs = {})
%aten_view_copy_default_13 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%aten_cat_default, [1, 1, 32, 128]), kwargs = {})
%aten_unsqueeze_copy_default_2 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.unsqueeze_copy.default](args = (%aten_sub_tensor_1, 4), kwargs = {})
%aten_unsqueeze_copy_default_3 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.unsqueeze_copy.default](args = (%aten_add_tensor_2, 4), kwargs = {})
%aten_cat_default_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.cat.default](args = ([%aten_unsqueeze_copy_default_2, %aten_unsqueeze_copy_default_3], -1), kwargs = {})
%aten_view_copy_default_14 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%aten_cat_default_1, [1, 1, 32, 128]), kwargs = {})
%aten_permute_copy_default_3 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.permute_copy.default](args = (%aten_view_copy_default_13, [0, 2, 1, 3]), kwargs = {})
%aten_permute_copy_default_4 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.permute_copy.default](args = (%aten_view_copy_default_14, [0, 2, 1, 3]), kwargs = {})
%aten_permute_copy_default_5 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.permute_copy.default](args = (%aten_view_copy_default_8, [0, 2, 1, 3]), kwargs = {})
%aten_view_copy_default_15 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%aten_permute_copy_default_4, [32, 1, 128]), kwargs = {})
%aten_index_put_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.index_put.default](args = (%arg77_1, [None, None, %arg840_1], %aten_view_copy_default_15), kwargs = {})
%aten_slice_scatter_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.slice_scatter.default](args = (%arg77_1, %aten_index_put_default, 1, 0, 9223372036854775807), kwargs = {})
%aten_slice_scatter_default_1 : [num_users=2] = call_function[target=executorch.exir.dialects.edge._ops.aten.slice_scatter.default](args = (%arg77_1, %aten_slice_scatter_default, 0, 0, 9223372036854775807), kwargs = {})
%aten_view_copy_default_16 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.view_copy.default](args = (%aten_permute_copy_default_5, [32, 1, 128]), kwargs = {})
%aten_index_put_default_1 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.index_put.default](args = (%arg78_1, [None, None, %arg840_1], %aten_view_copy_default_16), kwargs = {})
%aten_slice_scatter_default_2 : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.slice_scatter.default](args = (%arg78_1, %aten_index_put_default_1, 1, 0, 9223372036854775807), kwargs = {})
%aten_slice_scatter_default_3 : [num_users=2] = call_function[target=executorch.exir.dialects.edge._ops.aten.slice_scatter.default](args = (%arg78_1, %aten_slice_scatter_default_2, 0, 0, 92233
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment