Skip to content

Instantly share code, notes, and snippets.

@justinchuby
Created March 31, 2024 04:35
Show Gist options
  • Save justinchuby/402f2dd254ec8f674cc4b7178626bf33 to your computer and use it in GitHub Desktop.
Save justinchuby/402f2dd254ec8f674cc4b7178626bf33 to your computer and use it in GitHub Desktop.
Model analysis
This file has been truncated, but you can view the full file.
<
ir_version=8,
opset_imports={'pkg.onnxscript.torch_lib': 1, 'pkg.torch.2.4.0a0+gitd56ab7b': 1, 'pkg.transformers.4.37.2': 1, '': 18, 'pkg.onnxscript.torch_lib.common': 1},
producer_name='pytorch',
producer_version='2.4.0',
domain=None,
model_version=None,
>
graph(
name=main_graph,
inputs=(
%"l_input_ids_"<INT64,[1,128]>
),
outputs=(
%"_to_copy_194"<FLOAT,[1,128,32000]>,
%"model_1_1"<FLOAT16,[1,8,128,128]>,
%"model_1"<FLOAT16,[1,8,128,128]>,
%"model_1_3"<FLOAT16,[1,8,128,128]>,
%"model_1_2"<FLOAT16,[1,8,128,128]>,
%"model_1_5"<FLOAT16,[1,8,128,128]>,
%"model_1_4"<FLOAT16,[1,8,128,128]>,
%"model_1_7"<FLOAT16,[1,8,128,128]>,
%"model_1_6"<FLOAT16,[1,8,128,128]>,
%"model_1_9"<FLOAT16,[1,8,128,128]>,
%"model_1_8"<FLOAT16,[1,8,128,128]>,
%"model_1_11"<FLOAT16,[1,8,128,128]>,
%"model_1_10"<FLOAT16,[1,8,128,128]>,
%"model_1_13"<FLOAT16,[1,8,128,128]>,
%"model_1_12"<FLOAT16,[1,8,128,128]>,
%"model_1_15"<FLOAT16,[1,8,128,128]>,
%"model_1_14"<FLOAT16,[1,8,128,128]>,
%"model_1_17"<FLOAT16,[1,8,128,128]>,
%"model_1_16"<FLOAT16,[1,8,128,128]>,
%"model_1_19"<FLOAT16,[1,8,128,128]>,
%"model_1_18"<FLOAT16,[1,8,128,128]>,
%"model_1_21"<FLOAT16,[1,8,128,128]>,
%"model_1_20"<FLOAT16,[1,8,128,128]>,
%"model_1_23"<FLOAT16,[1,8,128,128]>,
%"model_1_22"<FLOAT16,[1,8,128,128]>,
%"model_1_25"<FLOAT16,[1,8,128,128]>,
%"model_1_24"<FLOAT16,[1,8,128,128]>,
%"model_1_27"<FLOAT16,[1,8,128,128]>,
%"model_1_26"<FLOAT16,[1,8,128,128]>,
%"model_1_29"<FLOAT16,[1,8,128,128]>,
%"model_1_28"<FLOAT16,[1,8,128,128]>,
%"model_1_31"<FLOAT16,[1,8,128,128]>,
%"model_1_30"<FLOAT16,[1,8,128,128]>,
%"model_1_33"<FLOAT16,[1,8,128,128]>,
%"model_1_32"<FLOAT16,[1,8,128,128]>,
%"model_1_35"<FLOAT16,[1,8,128,128]>,
%"model_1_34"<FLOAT16,[1,8,128,128]>,
%"model_1_37"<FLOAT16,[1,8,128,128]>,
%"model_1_36"<FLOAT16,[1,8,128,128]>,
%"model_1_39"<FLOAT16,[1,8,128,128]>,
%"model_1_38"<FLOAT16,[1,8,128,128]>,
%"model_1_41"<FLOAT16,[1,8,128,128]>,
%"model_1_40"<FLOAT16,[1,8,128,128]>,
%"model_1_43"<FLOAT16,[1,8,128,128]>,
%"model_1_42"<FLOAT16,[1,8,128,128]>,
%"model_1_45"<FLOAT16,[1,8,128,128]>,
%"model_1_44"<FLOAT16,[1,8,128,128]>,
%"model_1_47"<FLOAT16,[1,8,128,128]>,
%"model_1_46"<FLOAT16,[1,8,128,128]>,
%"model_1_49"<FLOAT16,[1,8,128,128]>,
%"model_1_48"<FLOAT16,[1,8,128,128]>,
%"model_1_51"<FLOAT16,[1,8,128,128]>,
%"model_1_50"<FLOAT16,[1,8,128,128]>,
%"model_1_53"<FLOAT16,[1,8,128,128]>,
%"model_1_52"<FLOAT16,[1,8,128,128]>,
%"model_1_55"<FLOAT16,[1,8,128,128]>,
%"model_1_54"<FLOAT16,[1,8,128,128]>,
%"model_1_57"<FLOAT16,[1,8,128,128]>,
%"model_1_56"<FLOAT16,[1,8,128,128]>,
%"model_1_59"<FLOAT16,[1,8,128,128]>,
%"model_1_58"<FLOAT16,[1,8,128,128]>,
%"model_1_61"<FLOAT16,[1,8,128,128]>,
%"model_1_60"<FLOAT16,[1,8,128,128]>,
%"model_1_63"<FLOAT16,[1,8,128,128]>,
%"model_1_62"<FLOAT16,[1,8,128,128]>
),
initializers=(
ExternalTensor<FLOAT16,[32000,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.embed_tokens.weight', offset=0), length=262144000),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.0.input_layernorm.weight', offset=262144000), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.0.self_attn.q_proj.weight', offset=262152192), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.0.self_attn.k_proj.weight', offset=295706624), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.0.self_attn.v_proj.weight', offset=304095232), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.0.self_attn.rotary_emb.cos_cached', offset=312483840), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.0.self_attn.rotary_emb.sin_cached', offset=320872448), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.0.self_attn.o_proj.weight', offset=329261056), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.0.post_attention_layernorm.weight', offset=362815488), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.0.mlp.gate_proj.weight', offset=362823680), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.0.mlp.up_proj.weight', offset=480264192), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.0.mlp.down_proj.weight', offset=597704704), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.1.input_layernorm.weight', offset=715145216), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.1.self_attn.q_proj.weight', offset=715153408), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.1.self_attn.k_proj.weight', offset=748707840), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.1.self_attn.v_proj.weight', offset=757096448), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.1.self_attn.rotary_emb.cos_cached', offset=765485056), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.1.self_attn.rotary_emb.sin_cached', offset=773873664), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.1.self_attn.o_proj.weight', offset=782262272), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.1.post_attention_layernorm.weight', offset=815816704), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.1.mlp.gate_proj.weight', offset=815824896), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.1.mlp.up_proj.weight', offset=933265408), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.1.mlp.down_proj.weight', offset=1050705920), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.2.input_layernorm.weight', offset=1168146432), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.2.self_attn.q_proj.weight', offset=1168154624), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.2.self_attn.k_proj.weight', offset=1201709056), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.2.self_attn.v_proj.weight', offset=1210097664), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.2.self_attn.rotary_emb.cos_cached', offset=1218486272), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.2.self_attn.rotary_emb.sin_cached', offset=1226874880), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.2.self_attn.o_proj.weight', offset=1235263488), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.2.post_attention_layernorm.weight', offset=1268817920), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.2.mlp.gate_proj.weight', offset=1268826112), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.2.mlp.up_proj.weight', offset=1386266624), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.2.mlp.down_proj.weight', offset=1503707136), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.3.input_layernorm.weight', offset=1621147648), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.3.self_attn.q_proj.weight', offset=1621155840), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.3.self_attn.k_proj.weight', offset=1654710272), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.3.self_attn.v_proj.weight', offset=1663098880), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.3.self_attn.rotary_emb.cos_cached', offset=1671487488), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.3.self_attn.rotary_emb.sin_cached', offset=1679876096), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.3.self_attn.o_proj.weight', offset=1688264704), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.3.post_attention_layernorm.weight', offset=1721819136), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.3.mlp.gate_proj.weight', offset=1721827328), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.3.mlp.up_proj.weight', offset=1839267840), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.3.mlp.down_proj.weight', offset=1956708352), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.4.input_layernorm.weight', offset=2074148864), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.4.self_attn.q_proj.weight', offset=2074157056), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.4.self_attn.k_proj.weight', offset=2107711488), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.4.self_attn.v_proj.weight', offset=2116100096), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.4.self_attn.rotary_emb.cos_cached', offset=2124488704), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.4.self_attn.rotary_emb.sin_cached', offset=2132877312), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.4.self_attn.o_proj.weight', offset=2141265920), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.4.post_attention_layernorm.weight', offset=2174820352), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.4.mlp.gate_proj.weight', offset=2174828544), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.4.mlp.up_proj.weight', offset=2292269056), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.4.mlp.down_proj.weight', offset=2409709568), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.5.input_layernorm.weight', offset=2527150080), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.5.self_attn.q_proj.weight', offset=2527158272), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.5.self_attn.k_proj.weight', offset=2560712704), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.5.self_attn.v_proj.weight', offset=2569101312), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.5.self_attn.rotary_emb.cos_cached', offset=2577489920), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.5.self_attn.rotary_emb.sin_cached', offset=2585878528), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.5.self_attn.o_proj.weight', offset=2594267136), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.5.post_attention_layernorm.weight', offset=2627821568), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.5.mlp.gate_proj.weight', offset=2627829760), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.5.mlp.up_proj.weight', offset=2745270272), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.5.mlp.down_proj.weight', offset=2862710784), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.6.input_layernorm.weight', offset=2980151296), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.6.self_attn.q_proj.weight', offset=2980159488), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.6.self_attn.k_proj.weight', offset=3013713920), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.6.self_attn.v_proj.weight', offset=3022102528), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.6.self_attn.rotary_emb.cos_cached', offset=3030491136), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.6.self_attn.rotary_emb.sin_cached', offset=3038879744), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.6.self_attn.o_proj.weight', offset=3047268352), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.6.post_attention_layernorm.weight', offset=3080822784), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.6.mlp.gate_proj.weight', offset=3080830976), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.6.mlp.up_proj.weight', offset=3198271488), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.6.mlp.down_proj.weight', offset=3315712000), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.7.input_layernorm.weight', offset=3433152512), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.7.self_attn.q_proj.weight', offset=3433160704), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.7.self_attn.k_proj.weight', offset=3466715136), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.7.self_attn.v_proj.weight', offset=3475103744), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.7.self_attn.rotary_emb.cos_cached', offset=3483492352), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.7.self_attn.rotary_emb.sin_cached', offset=3491880960), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.7.self_attn.o_proj.weight', offset=3500269568), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.7.post_attention_layernorm.weight', offset=3533824000), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.7.mlp.gate_proj.weight', offset=3533832192), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.7.mlp.up_proj.weight', offset=3651272704), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.7.mlp.down_proj.weight', offset=3768713216), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.8.input_layernorm.weight', offset=3886153728), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.8.self_attn.q_proj.weight', offset=3886161920), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.8.self_attn.k_proj.weight', offset=3919716352), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.8.self_attn.v_proj.weight', offset=3928104960), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.8.self_attn.rotary_emb.cos_cached', offset=3936493568), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.8.self_attn.rotary_emb.sin_cached', offset=3944882176), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.8.self_attn.o_proj.weight', offset=3953270784), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.8.post_attention_layernorm.weight', offset=3986825216), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.8.mlp.gate_proj.weight', offset=3986833408), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.8.mlp.up_proj.weight', offset=4104273920), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.8.mlp.down_proj.weight', offset=4221714432), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.9.input_layernorm.weight', offset=4339154944), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.9.self_attn.q_proj.weight', offset=4339163136), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.9.self_attn.k_proj.weight', offset=4372717568), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.9.self_attn.v_proj.weight', offset=4381106176), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.9.self_attn.rotary_emb.cos_cached', offset=4389494784), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.9.self_attn.rotary_emb.sin_cached', offset=4397883392), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.9.self_attn.o_proj.weight', offset=4406272000), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.9.post_attention_layernorm.weight', offset=4439826432), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.9.mlp.gate_proj.weight', offset=4439834624), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.9.mlp.up_proj.weight', offset=4557275136), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.9.mlp.down_proj.weight', offset=4674715648), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.10.input_layernorm.weight', offset=4792156160), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.10.self_attn.q_proj.weight', offset=4792164352), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.10.self_attn.k_proj.weight', offset=4825718784), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.10.self_attn.v_proj.weight', offset=4834107392), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.10.self_attn.rotary_emb.cos_cached', offset=4842496000), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.10.self_attn.rotary_emb.sin_cached', offset=4850884608), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.10.self_attn.o_proj.weight', offset=4859273216), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.10.post_attention_layernorm.weight', offset=4892827648), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.10.mlp.gate_proj.weight', offset=4892835840), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.10.mlp.up_proj.weight', offset=5010276352), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.10.mlp.down_proj.weight', offset=5127716864), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.11.input_layernorm.weight', offset=5245157376), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.11.self_attn.q_proj.weight', offset=5245165568), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.11.self_attn.k_proj.weight', offset=5278720000), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.11.self_attn.v_proj.weight', offset=5287108608), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.11.self_attn.rotary_emb.cos_cached', offset=5295497216), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.11.self_attn.rotary_emb.sin_cached', offset=5303885824), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.11.self_attn.o_proj.weight', offset=5312274432), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.11.post_attention_layernorm.weight', offset=5345828864), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.11.mlp.gate_proj.weight', offset=5345837056), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.11.mlp.up_proj.weight', offset=5463277568), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.11.mlp.down_proj.weight', offset=5580718080), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.12.input_layernorm.weight', offset=5698158592), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.12.self_attn.q_proj.weight', offset=5698166784), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.12.self_attn.k_proj.weight', offset=5731721216), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.12.self_attn.v_proj.weight', offset=5740109824), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.12.self_attn.rotary_emb.cos_cached', offset=5748498432), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.12.self_attn.rotary_emb.sin_cached', offset=5756887040), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.12.self_attn.o_proj.weight', offset=5765275648), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.12.post_attention_layernorm.weight', offset=5798830080), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.12.mlp.gate_proj.weight', offset=5798838272), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.12.mlp.up_proj.weight', offset=5916278784), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.12.mlp.down_proj.weight', offset=6033719296), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.13.input_layernorm.weight', offset=6151159808), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.13.self_attn.q_proj.weight', offset=6151168000), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.13.self_attn.k_proj.weight', offset=6184722432), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.13.self_attn.v_proj.weight', offset=6193111040), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.13.self_attn.rotary_emb.cos_cached', offset=6201499648), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.13.self_attn.rotary_emb.sin_cached', offset=6209888256), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.13.self_attn.o_proj.weight', offset=6218276864), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.13.post_attention_layernorm.weight', offset=6251831296), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.13.mlp.gate_proj.weight', offset=6251839488), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.13.mlp.up_proj.weight', offset=6369280000), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.13.mlp.down_proj.weight', offset=6486720512), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.14.input_layernorm.weight', offset=6604161024), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.14.self_attn.q_proj.weight', offset=6604169216), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.14.self_attn.k_proj.weight', offset=6637723648), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.14.self_attn.v_proj.weight', offset=6646112256), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.14.self_attn.rotary_emb.cos_cached', offset=6654500864), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.14.self_attn.rotary_emb.sin_cached', offset=6662889472), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.14.self_attn.o_proj.weight', offset=6671278080), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.14.post_attention_layernorm.weight', offset=6704832512), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.14.mlp.gate_proj.weight', offset=6704840704), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.14.mlp.up_proj.weight', offset=6822281216), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.14.mlp.down_proj.weight', offset=6939721728), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.15.input_layernorm.weight', offset=7057162240), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.15.self_attn.q_proj.weight', offset=7057170432), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.15.self_attn.k_proj.weight', offset=7090724864), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.15.self_attn.v_proj.weight', offset=7099113472), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.15.self_attn.rotary_emb.cos_cached', offset=7107502080), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.15.self_attn.rotary_emb.sin_cached', offset=7115890688), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.15.self_attn.o_proj.weight', offset=7124279296), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.15.post_attention_layernorm.weight', offset=7157833728), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.15.mlp.gate_proj.weight', offset=7157841920), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.15.mlp.up_proj.weight', offset=7275282432), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.15.mlp.down_proj.weight', offset=7392722944), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.16.input_layernorm.weight', offset=7510163456), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.16.self_attn.q_proj.weight', offset=7510171648), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.16.self_attn.k_proj.weight', offset=7543726080), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.16.self_attn.v_proj.weight', offset=7552114688), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.16.self_attn.rotary_emb.cos_cached', offset=7560503296), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.16.self_attn.rotary_emb.sin_cached', offset=7568891904), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.16.self_attn.o_proj.weight', offset=7577280512), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.16.post_attention_layernorm.weight', offset=7610834944), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.16.mlp.gate_proj.weight', offset=7610843136), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.16.mlp.up_proj.weight', offset=7728283648), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.16.mlp.down_proj.weight', offset=7845724160), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.17.input_layernorm.weight', offset=7963164672), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.17.self_attn.q_proj.weight', offset=7963172864), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.17.self_attn.k_proj.weight', offset=7996727296), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.17.self_attn.v_proj.weight', offset=8005115904), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.17.self_attn.rotary_emb.cos_cached', offset=8013504512), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.17.self_attn.rotary_emb.sin_cached', offset=8021893120), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.17.self_attn.o_proj.weight', offset=8030281728), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.17.post_attention_layernorm.weight', offset=8063836160), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.17.mlp.gate_proj.weight', offset=8063844352), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.17.mlp.up_proj.weight', offset=8181284864), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.17.mlp.down_proj.weight', offset=8298725376), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.18.input_layernorm.weight', offset=8416165888), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.18.self_attn.q_proj.weight', offset=8416174080), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.18.self_attn.k_proj.weight', offset=8449728512), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.18.self_attn.v_proj.weight', offset=8458117120), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.18.self_attn.rotary_emb.cos_cached', offset=8466505728), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.18.self_attn.rotary_emb.sin_cached', offset=8474894336), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.18.self_attn.o_proj.weight', offset=8483282944), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.18.post_attention_layernorm.weight', offset=8516837376), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.18.mlp.gate_proj.weight', offset=8516845568), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.18.mlp.up_proj.weight', offset=8634286080), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.18.mlp.down_proj.weight', offset=8751726592), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.19.input_layernorm.weight', offset=8869167104), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.19.self_attn.q_proj.weight', offset=8869175296), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.19.self_attn.k_proj.weight', offset=8902729728), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.19.self_attn.v_proj.weight', offset=8911118336), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.19.self_attn.rotary_emb.cos_cached', offset=8919506944), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.19.self_attn.rotary_emb.sin_cached', offset=8927895552), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.19.self_attn.o_proj.weight', offset=8936284160), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.19.post_attention_layernorm.weight', offset=8969838592), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.19.mlp.gate_proj.weight', offset=8969846784), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.19.mlp.up_proj.weight', offset=9087287296), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.19.mlp.down_proj.weight', offset=9204727808), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.20.input_layernorm.weight', offset=9322168320), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.20.self_attn.q_proj.weight', offset=9322176512), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.20.self_attn.k_proj.weight', offset=9355730944), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.20.self_attn.v_proj.weight', offset=9364119552), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.20.self_attn.rotary_emb.cos_cached', offset=9372508160), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.20.self_attn.rotary_emb.sin_cached', offset=9380896768), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.20.self_attn.o_proj.weight', offset=9389285376), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.20.post_attention_layernorm.weight', offset=9422839808), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.20.mlp.gate_proj.weight', offset=9422848000), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.20.mlp.up_proj.weight', offset=9540288512), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.20.mlp.down_proj.weight', offset=9657729024), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.21.input_layernorm.weight', offset=9775169536), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.21.self_attn.q_proj.weight', offset=9775177728), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.21.self_attn.k_proj.weight', offset=9808732160), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.21.self_attn.v_proj.weight', offset=9817120768), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.21.self_attn.rotary_emb.cos_cached', offset=9825509376), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.21.self_attn.rotary_emb.sin_cached', offset=9833897984), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.21.self_attn.o_proj.weight', offset=9842286592), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.21.post_attention_layernorm.weight', offset=9875841024), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.21.mlp.gate_proj.weight', offset=9875849216), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.21.mlp.up_proj.weight', offset=9993289728), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.21.mlp.down_proj.weight', offset=10110730240), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.22.input_layernorm.weight', offset=10228170752), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.22.self_attn.q_proj.weight', offset=10228178944), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.22.self_attn.k_proj.weight', offset=10261733376), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.22.self_attn.v_proj.weight', offset=10270121984), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.22.self_attn.rotary_emb.cos_cached', offset=10278510592), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.22.self_attn.rotary_emb.sin_cached', offset=10286899200), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.22.self_attn.o_proj.weight', offset=10295287808), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.22.post_attention_layernorm.weight', offset=10328842240), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.22.mlp.gate_proj.weight', offset=10328850432), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.22.mlp.up_proj.weight', offset=10446290944), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.22.mlp.down_proj.weight', offset=10563731456), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.23.input_layernorm.weight', offset=10681171968), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.23.self_attn.q_proj.weight', offset=10681180160), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.23.self_attn.k_proj.weight', offset=10714734592), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.23.self_attn.v_proj.weight', offset=10723123200), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.23.self_attn.rotary_emb.cos_cached', offset=10731511808), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.23.self_attn.rotary_emb.sin_cached', offset=10739900416), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.23.self_attn.o_proj.weight', offset=10748289024), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.23.post_attention_layernorm.weight', offset=10781843456), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.23.mlp.gate_proj.weight', offset=10781851648), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.23.mlp.up_proj.weight', offset=10899292160), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.23.mlp.down_proj.weight', offset=11016732672), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.24.input_layernorm.weight', offset=11134173184), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.24.self_attn.q_proj.weight', offset=11134181376), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.24.self_attn.k_proj.weight', offset=11167735808), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.24.self_attn.v_proj.weight', offset=11176124416), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.24.self_attn.rotary_emb.cos_cached', offset=11184513024), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.24.self_attn.rotary_emb.sin_cached', offset=11192901632), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.24.self_attn.o_proj.weight', offset=11201290240), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.24.post_attention_layernorm.weight', offset=11234844672), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.24.mlp.gate_proj.weight', offset=11234852864), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.24.mlp.up_proj.weight', offset=11352293376), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.24.mlp.down_proj.weight', offset=11469733888), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.25.input_layernorm.weight', offset=11587174400), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.25.self_attn.q_proj.weight', offset=11587182592), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.25.self_attn.k_proj.weight', offset=11620737024), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.25.self_attn.v_proj.weight', offset=11629125632), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.25.self_attn.rotary_emb.cos_cached', offset=11637514240), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.25.self_attn.rotary_emb.sin_cached', offset=11645902848), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.25.self_attn.o_proj.weight', offset=11654291456), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.25.post_attention_layernorm.weight', offset=11687845888), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.25.mlp.gate_proj.weight', offset=11687854080), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.25.mlp.up_proj.weight', offset=11805294592), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.25.mlp.down_proj.weight', offset=11922735104), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.26.input_layernorm.weight', offset=12040175616), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.26.self_attn.q_proj.weight', offset=12040183808), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.26.self_attn.k_proj.weight', offset=12073738240), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.26.self_attn.v_proj.weight', offset=12082126848), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.26.self_attn.rotary_emb.cos_cached', offset=12090515456), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.26.self_attn.rotary_emb.sin_cached', offset=12098904064), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.26.self_attn.o_proj.weight', offset=12107292672), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.26.post_attention_layernorm.weight', offset=12140847104), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.26.mlp.gate_proj.weight', offset=12140855296), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.26.mlp.up_proj.weight', offset=12258295808), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.26.mlp.down_proj.weight', offset=12375736320), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.27.input_layernorm.weight', offset=12493176832), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.27.self_attn.q_proj.weight', offset=12493185024), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.27.self_attn.k_proj.weight', offset=12526739456), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.27.self_attn.v_proj.weight', offset=12535128064), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.27.self_attn.rotary_emb.cos_cached', offset=12543516672), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.27.self_attn.rotary_emb.sin_cached', offset=12551905280), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.27.self_attn.o_proj.weight', offset=12560293888), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.27.post_attention_layernorm.weight', offset=12593848320), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.27.mlp.gate_proj.weight', offset=12593856512), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.27.mlp.up_proj.weight', offset=12711297024), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.27.mlp.down_proj.weight', offset=12828737536), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.28.input_layernorm.weight', offset=12946178048), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.28.self_attn.q_proj.weight', offset=12946186240), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.28.self_attn.k_proj.weight', offset=12979740672), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.28.self_attn.v_proj.weight', offset=12988129280), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.28.self_attn.rotary_emb.cos_cached', offset=12996517888), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.28.self_attn.rotary_emb.sin_cached', offset=13004906496), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.28.self_attn.o_proj.weight', offset=13013295104), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.28.post_attention_layernorm.weight', offset=13046849536), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.28.mlp.gate_proj.weight', offset=13046857728), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.28.mlp.up_proj.weight', offset=13164298240), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.28.mlp.down_proj.weight', offset=13281738752), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.29.input_layernorm.weight', offset=13399179264), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.29.self_attn.q_proj.weight', offset=13399187456), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.29.self_attn.k_proj.weight', offset=13432741888), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.29.self_attn.v_proj.weight', offset=13441130496), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.29.self_attn.rotary_emb.cos_cached', offset=13449519104), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.29.self_attn.rotary_emb.sin_cached', offset=13457907712), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.29.self_attn.o_proj.weight', offset=13466296320), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.29.post_attention_layernorm.weight', offset=13499850752), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.29.mlp.gate_proj.weight', offset=13499858944), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.29.mlp.up_proj.weight', offset=13617299456), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.29.mlp.down_proj.weight', offset=13734739968), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.30.input_layernorm.weight', offset=13852180480), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.30.self_attn.q_proj.weight', offset=13852188672), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.30.self_attn.k_proj.weight', offset=13885743104), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.30.self_attn.v_proj.weight', offset=13894131712), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.30.self_attn.rotary_emb.cos_cached', offset=13902520320), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.30.self_attn.rotary_emb.sin_cached', offset=13910908928), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.30.self_attn.o_proj.weight', offset=13919297536), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.30.post_attention_layernorm.weight', offset=13952851968), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.30.mlp.gate_proj.weight', offset=13952860160), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.30.mlp.up_proj.weight', offset=14070300672), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.30.mlp.down_proj.weight', offset=14187741184), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.31.input_layernorm.weight', offset=14305181696), length=8192),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.31.self_attn.q_proj.weight', offset=14305189888), length=33554432),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.31.self_attn.k_proj.weight', offset=14338744320), length=8388608),
ExternalTensor<FLOAT16,[1024,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.31.self_attn.v_proj.weight', offset=14347132928), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.31.self_attn.rotary_emb.cos_cached', offset=14355521536), length=8388608),
ExternalTensor<FLOAT16,[32768,128]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.31.self_attn.rotary_emb.sin_cached', offset=14363910144), length=8388608),
ExternalTensor<FLOAT16,[4096,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.31.self_attn.o_proj.weight', offset=14372298752), length=33554432),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.31.post_attention_layernorm.weight', offset=14405853184), length=8192),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.31.mlp.gate_proj.weight', offset=14405861376), length=117440512),
ExternalTensor<FLOAT16,[14336,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.31.mlp.up_proj.weight', offset=14523301888), length=117440512),
ExternalTensor<FLOAT16,[4096,14336]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.layers.31.mlp.down_proj.weight', offset=14640742400), length=117440512),
ExternalTensor<FLOAT16,[4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='model.norm.weight', offset=14758182912), length=8192),
ExternalTensor<FLOAT16,[32000,4096]>(path='e881dec8-e6ee-11ee-8118-0242ac110002', name='lm_head.weight', offset=14758191104), length=262144000)
),
) {
0 | # Constant_4__1
%"_val_1__1"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1 | # Cast_5__1
%"_val_2__1"<INT64,?> ⬅️ ::Cast(%"_val_1__1") {to=7}
2 | # Constant_6__1
%"_val_3__1"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3 | # Cast_7__1
%"_val_4__1"<INT64,?> ⬅️ ::Cast(%"_val_3__1") {to=7}
4 | # Constant_8__1
%"_val_5__1"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5 | # Cast_9__1
%"_val_6__1"<INT64,?> ⬅️ ::Cast(%"_val_5__1") {to=7}
6 | # Range_10__1
%"arange__1"<INT64,[unk__0]> ⬅️ ::Range(%"_val_4__1", %"_val_2__1", %"_val_6__1")
7 | # n0__2
%"dim__2"<INT64,?> ⬅️ ::Constant() {value_int=0}
8 | # n1__2
%"dim_0__2"<INT64,?> ⬅️ ::Cast(%"dim__2") {to=7}
9 | # n2__2
%"unsqueeze__1"<INT64,?> ⬅️ ::Unsqueeze(%"arange__1", %"dim_0__2")
10 | # Constant_12__1
%"_val_9__1"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
11 | # n0__3
%"size_0__3"<INT64,[2]> ⬅️ ::Cast(%"_val_9__1") {to=7}
12 | # n1__3
%"view__1"<INT64,[unk__1,unk__2]> ⬅️ ::Reshape(%"unsqueeze__1", %"size_0__3")
13 | # n0__5
%"model_embed_tokens_1__1"<FLOAT16,[1,128,4096]> ⬅️ ::Gather(%"model.embed_tokens.weight", %"l_input_ids_")
14 | # Cast_3__7
%"_to_copy__7"<FLOAT,[1,128,4096]> ⬅️ ::Cast(%"model_embed_tokens_1__1") {to=1}
15 | # Constant_4__7
%"_val_2__7"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
16 | # Cast_5__7
%"scalar_tensor_default__7"<FLOAT,?> ⬅️ ::Cast(%"_val_2__7") {to=1}
17 | # n0__8
%"pow_1__7"<FLOAT,[1,128,4096]> ⬅️ ::Pow(%"_to_copy__7", %"scalar_tensor_default__7")
18 | # Constant_7__7
%"_val_5__7"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
19 | # n0__10
%"tmp__10"<INT64,[3]> ⬅️ ::Shape(%"pow_1__7")
20 | # n1__10
%"tmp_0__10"<INT64,?> ⬅️ ::Size(%"tmp__10")
21 | # n2__10
%"tmp_1__10"<INT64,?> ⬅️ ::Constant() {value_int=0}
22 | # n3__10
%"cond__9"<BOOL,?> ⬅️ ::Equal(%"tmp_0__10", %"tmp_1__10")
23 | # n1__9
%"mean__7"<FLOAT,?> ⬅️ ::If(%"cond__9") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__9"<FLOAT,[1,128,4096]>
),
) {
0 | # n0__9_0
%"result__9"<FLOAT,[1,128,4096]> ⬅️ ::Identity(%"pow_1__7")
return %"result__9"<FLOAT,[1,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__9"<FLOAT,?>
),
) {
0 | # n0__11
%"tmp__11"<INT64,[1]> ⬅️ ::Shape(%"_val_5__7")
1 | # n1__11
%"tmp_0__11"<INT64,?> ⬅️ ::Size(%"tmp__11")
2 | # n2__11
%"tmp_1__11"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__11
%"cond_0__9"<BOOL,?> ⬅️ ::Equal(%"tmp_0__11", %"tmp_1__11")
4 | # n1__9_2
%"dim_3__9"<INT64,?> ⬅️ ::If(%"cond_0__9") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__9"<INT64,[1,1]>
),
) {
0 | # n0__9_3
%"int64_0__9"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__9_4
%"dim_1__9"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__7", %"int64_0__9")
return %"dim_1__9"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__9"<INT64,[1]>
),
) {
0 | # n0__9_5
%"dim_2__9"<INT64,[1]> ⬅️ ::Identity(%"_val_5__7")
return %"dim_2__9"<INT64,[1]>
}}
5 | # n2__9
%"result_4__9"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_1__7", %"dim_3__9") {keepdims=1}
return %"result_4__9"<FLOAT,?>
}}
24 | # Constant_9__7
%"_val_7__7"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
25 | # n0__12
%"alpha__12"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
26 | # n1__12
%"alpha_0__12"<FLOAT,?> ⬅️ ::CastLike(%"alpha__12", %"_val_7__7")
27 | # n2__12
%"other_1__12"<FLOAT,?> ⬅️ ::Mul(%"_val_7__7", %"alpha_0__12")
28 | # n3__12
%"add__7"<FLOAT,?> ⬅️ ::Add(%"mean__7", %"other_1__12")
29 | # n0__13
%"tmp__13"<FLOAT,?> ⬅️ ::Sqrt(%"add__7")
30 | # n1__13
%"rsqrt__7"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__13")
31 | # n0__14
%"mul__7"<FLOAT,?> ⬅️ ::Mul(%"_to_copy__7", %"rsqrt__7")
32 | # Cast_13__7
%"_to_copy_1__7"<FLOAT16,?> ⬅️ ::Cast(%"mul__7") {to=10}
33 | # n0__15
%"model_layers_0_input_layernorm_1__6"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.0.input_layernorm.weight", %"_to_copy_1__7")
34 | # n0__19
%"tmp__19"<INT64,[2]> ⬅️ ::Shape(%"model.layers.0.self_attn.q_proj.weight")
35 | # n1__19
%"rank__18"<INT64,?> ⬅️ ::Size(%"tmp__19")
36 | # n1__18
%"int64_2__18"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
37 | # n2__18
%"int64_2_cast__18"<INT64,?> ⬅️ ::CastLike(%"int64_2__18", %"rank__18")
38 | # n3__18
%"cond__18"<BOOL,?> ⬅️ ::Equal(%"rank__18", %"int64_2_cast__18")
39 | # n4__18
%"t__17"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__18") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__18"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__18_6
%"result__18"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.0.self_attn.q_proj.weight") {perm=[1, 0]}
return %"result__18"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__18"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__18_7
%"result_0__18"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.0.self_attn.q_proj.weight")
return %"result_0__18"<FLOAT16,[4096,4096]>
}}
40 | # Constant_3__17
%"_val_3__17"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
41 | # n0__20
%"size_0__20"<INT64,[2]> ⬅️ ::Cast(%"_val_3__17") {to=7}
42 | # n1__20
%"view_1__17"<FLOAT16,[unk__3,unk__4]> ⬅️ ::Reshape(%"model_layers_0_input_layernorm_1__6", %"size_0__20")
43 | # n0__21
%"mm__17"<FLOAT16,[unk__3,4096]> ⬅️ ::MatMul(%"view_1__17", %"t__17")
44 | # Constant_6__17
%"_val_6__17"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
45 | # n0__22
%"size_0__22"<INT64,[3]> ⬅️ ::Cast(%"_val_6__17") {to=7}
46 | # n1__22
%"model_layers_0_self_attn_q_proj_1__16"<FLOAT16,[unk__5,unk__6,unk__7]> ⬅️ ::Reshape(%"mm__17", %"size_0__22")
47 | # n0__25
%"tmp__25"<INT64,[2]> ⬅️ ::Shape(%"model.layers.0.self_attn.k_proj.weight")
48 | # n1__25
%"rank__24"<INT64,?> ⬅️ ::Size(%"tmp__25")
49 | # n1__24
%"int64_2__24"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
50 | # n2__24
%"int64_2_cast__24"<INT64,?> ⬅️ ::CastLike(%"int64_2__24", %"rank__24")
51 | # n3__24
%"cond__24"<BOOL,?> ⬅️ ::Equal(%"rank__24", %"int64_2_cast__24")
52 | # n4__24
%"t_1__23"<FLOAT16,[unk__8,unk__9]> ⬅️ ::If(%"cond__24") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__24"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__24_8
%"result__24"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.0.self_attn.k_proj.weight") {perm=[1, 0]}
return %"result__24"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__24"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__24_9
%"result_0__24"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.0.self_attn.k_proj.weight")
return %"result_0__24"<FLOAT16,[1024,4096]>
}}
53 | # Constant_3__23
%"_val_3__23"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
54 | # n0__26
%"size_0__26"<INT64,[2]> ⬅️ ::Cast(%"_val_3__23") {to=7}
55 | # n1__26
%"view_3__23"<FLOAT16,[unk__10,unk__11]> ⬅️ ::Reshape(%"model_layers_0_input_layernorm_1__6", %"size_0__26")
56 | # n0__27
%"mm_1__23"<FLOAT16,[unk__10,unk__9]> ⬅️ ::MatMul(%"view_3__23", %"t_1__23")
57 | # Constant_6__23
%"_val_6__23"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
58 | # n0__28
%"size_0__28"<INT64,[3]> ⬅️ ::Cast(%"_val_6__23") {to=7}
59 | # n1__28
%"model_layers_0_self_attn_k_proj_1__16"<FLOAT16,[unk__12,unk__13,unk__14]> ⬅️ ::Reshape(%"mm_1__23", %"size_0__28")
60 | # n0__31
%"tmp__31"<INT64,[2]> ⬅️ ::Shape(%"model.layers.0.self_attn.v_proj.weight")
61 | # n1__31
%"rank__30"<INT64,?> ⬅️ ::Size(%"tmp__31")
62 | # n1__30
%"int64_2__30"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
63 | # n2__30
%"int64_2_cast__30"<INT64,?> ⬅️ ::CastLike(%"int64_2__30", %"rank__30")
64 | # n3__30
%"cond__30"<BOOL,?> ⬅️ ::Equal(%"rank__30", %"int64_2_cast__30")
65 | # n4__30
%"t_2__29"<FLOAT16,[unk__15,unk__16]> ⬅️ ::If(%"cond__30") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__30"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__30_10
%"result__30"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.0.self_attn.v_proj.weight") {perm=[1, 0]}
return %"result__30"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__30"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__30_11
%"result_0__30"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.0.self_attn.v_proj.weight")
return %"result_0__30"<FLOAT16,[1024,4096]>
}}
66 | # Constant_3__29
%"_val_3__29"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
67 | # n0__32
%"size_0__32"<INT64,[2]> ⬅️ ::Cast(%"_val_3__29") {to=7}
68 | # n1__32
%"view_5__29"<FLOAT16,[unk__17,unk__18]> ⬅️ ::Reshape(%"model_layers_0_input_layernorm_1__6", %"size_0__32")
69 | # n0__33
%"mm_2__29"<FLOAT16,[unk__17,unk__16]> ⬅️ ::MatMul(%"view_5__29", %"t_2__29")
70 | # Constant_6__29
%"_val_6__29"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
71 | # n0__34
%"size_0__34"<INT64,[3]> ⬅️ ::Cast(%"_val_6__29") {to=7}
72 | # n1__34
%"model_layers_0_self_attn_v_proj_1__16"<FLOAT16,[unk__19,unk__20,unk__21]> ⬅️ ::Reshape(%"mm_2__29", %"size_0__34")
73 | # Constant_61__16
%"_val_8__16"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
74 | # n0__35
%"size_0__35"<INT64,[4]> ⬅️ ::Cast(%"_val_8__16") {to=7}
75 | # n1__35
%"view_7__16"<FLOAT16,[unk__22,unk__23,unk__24,unk__25]> ⬅️ ::Reshape(%"model_layers_0_self_attn_q_proj_1__16", %"size_0__35")
76 | # Transpose_63__16
%"transpose__16"<FLOAT16,[unk__22,unk__24,unk__23,unk__25]> ⬅️ ::Transpose(%"view_7__16") {perm=[0, 2, 1, 3]}
77 | # Constant_64__16
%"_val_11__16"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
78 | # n0__36
%"size_0__36"<INT64,[4]> ⬅️ ::Cast(%"_val_11__16") {to=7}
79 | # n1__36
%"view_8__16"<FLOAT16,[unk__26,unk__27,unk__28,unk__29]> ⬅️ ::Reshape(%"model_layers_0_self_attn_k_proj_1__16", %"size_0__36")
80 | # Transpose_66__16
%"transpose_1__16"<FLOAT16,[unk__26,unk__28,unk__27,unk__29]> ⬅️ ::Transpose(%"view_8__16") {perm=[0, 2, 1, 3]}
81 | # Constant_67__16
%"_val_14__16"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
82 | # n0__37
%"size_0__37"<INT64,[4]> ⬅️ ::Cast(%"_val_14__16") {to=7}
83 | # n1__37
%"view_9__16"<FLOAT16,[unk__30,unk__31,unk__32,unk__33]> ⬅️ ::Reshape(%"model_layers_0_self_attn_v_proj_1__16", %"size_0__37")
84 | # Transpose_69__16
%"model_1"<FLOAT16,[1,8,128,128]> ⬅️ ::Transpose(%"view_9__16") {perm=[0, 2, 1, 3]}
85 | # Constant_8__38
%"_val_1__38"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
86 | # Cast_9__38
%"_val_2__38"<INT64,?> ⬅️ ::Cast(%"_val_1__38") {to=7}
87 | # Constant_10__38
%"_val_3__38"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
88 | # Reshape_11__38
%"_val_4__38"<INT64,[1]> ⬅️ ::Reshape(%"_val_2__38", %"_val_3__38") {allowzero=0}
89 | # Constant_12__38
%"_val_5__38"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
90 | # Cast_13__38
%"_val_6__38"<INT64,?> ⬅️ ::Cast(%"_val_5__38") {to=7}
91 | # Constant_14__38
%"_val_7__38"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
92 | # Reshape_15__38
%"_val_8__38"<INT64,[1]> ⬅️ ::Reshape(%"_val_6__38", %"_val_7__38") {allowzero=0}
93 | # Constant_16__38
%"_val_9__38"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
94 | # Cast_17__38
%"_val_10__38"<INT64,?> ⬅️ ::Cast(%"_val_9__38") {to=7}
95 | # Constant_18__38
%"_val_11__38"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
96 | # Reshape_19__38
%"_val_12__38"<INT64,[1]> ⬅️ ::Reshape(%"_val_10__38", %"_val_11__38") {allowzero=0}
97 | # Constant_20__38
%"_val_13__38"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
98 | # Cast_21__38
%"_val_14__38"<INT64,?> ⬅️ ::Cast(%"_val_13__38") {to=7}
99 | # Constant_22__38
%"_val_15__38"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
100 | # Reshape_23__38
%"_val_16__38"<INT64,[1]> ⬅️ ::Reshape(%"_val_14__38", %"_val_15__38") {allowzero=0}
101 | # Slice_24__38
%"model_layers_0_self_attn_rotary_emb_1__16"<FLOAT16,[unk__34,unk__35]> ⬅️ ::Slice(%"model.layers.0.self_attn.rotary_emb.cos_cached", %"_val_4__38", %"_val_8__38", %"_val_12__38", %"_val_16__38")
102 | # Constant_25__38
%"_val_19__38"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
103 | # Cast_26__38
%"_val_20__38"<INT64,?> ⬅️ ::Cast(%"_val_19__38") {to=7}
104 | # Constant_27__38
%"_val_21__38"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
105 | # Reshape_28__38
%"_val_22__38"<INT64,[1]> ⬅️ ::Reshape(%"_val_20__38", %"_val_21__38") {allowzero=0}
106 | # Constant_29__38
%"_val_23__38"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
107 | # Cast_30__38
%"_val_24__38"<INT64,?> ⬅️ ::Cast(%"_val_23__38") {to=7}
108 | # Constant_31__38
%"_val_25__38"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
109 | # Reshape_32__38
%"_val_26__38"<INT64,[1]> ⬅️ ::Reshape(%"_val_24__38", %"_val_25__38") {allowzero=0}
110 | # Constant_33__38
%"_val_27__38"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
111 | # Cast_34__38
%"_val_28__38"<INT64,?> ⬅️ ::Cast(%"_val_27__38") {to=7}
112 | # Constant_35__38
%"_val_29__38"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
113 | # Reshape_36__38
%"_val_30__38"<INT64,[1]> ⬅️ ::Reshape(%"_val_28__38", %"_val_29__38") {allowzero=0}
114 | # Constant_37__38
%"_val_31__38"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
115 | # Cast_38__38
%"_val_32__38"<INT64,?> ⬅️ ::Cast(%"_val_31__38") {to=7}
116 | # Constant_39__38
%"_val_33__38"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
117 | # Reshape_40__38
%"_val_34__38"<INT64,[1]> ⬅️ ::Reshape(%"_val_32__38", %"_val_33__38") {allowzero=0}
118 | # Slice_41__38
%"model_layers_0_self_attn_rotary_emb_1_1__16"<FLOAT16,[unk__36,unk__37]> ⬅️ ::Slice(%"model.layers.0.self_attn.rotary_emb.sin_cached", %"_val_22__38", %"_val_26__38", %"_val_30__38", %"_val_34__38")
119 | # Transpose_71__16
%"_val_21__16"<FLOAT16,[unk__34,unk__35]> ⬅️ ::Transpose(%"model_layers_0_self_attn_rotary_emb_1__16") {perm=[0, 1]}
120 | # Max_72__16
%"_val_22__16"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
121 | # Shape_73__16
%"_val_23__16"<INT64,[2]> ⬅️ ::Shape(%"_val_22__16") {start=0}
122 | # Expand_74__16
%"_val_24__16"<INT64,[unk__38,unk__39]> ⬅️ ::Expand(%"view__1", %"_val_23__16")
123 | # Constant_75__16
%"_val_25__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
124 | # Unsqueeze_76__16
%"_val_26__16"<INT64,[unk__38,unk__39,1]> ⬅️ ::Unsqueeze(%"_val_24__16", %"_val_25__16")
125 | # Concat_77__16
%"_val_27__16"<INT64,[unk__38,unk__39,1]> ⬅️ ::Concat(%"_val_26__16") {axis=-1}
126 | # GatherND_78__16
%"_val_28__16"<FLOAT16,[unk__38,unk__39,unk__35]> ⬅️ ::GatherND(%"_val_21__16", %"_val_27__16") {batch_dims=0}
127 | # Transpose_79__16
%"index__16"<FLOAT16,[unk__38,unk__39,unk__35]> ⬅️ ::Transpose(%"_val_28__16") {perm=[0, 1, 2]}
128 | # n0__39
%"dim__39"<INT64,?> ⬅️ ::Constant() {value_int=1}
129 | # n1__39
%"dim_0__39"<INT64,?> ⬅️ ::Cast(%"dim__39") {to=7}
130 | # n2__39
%"unsqueeze_1__16"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index__16", %"dim_0__39")
131 | # Transpose_81__16
%"_val_31__16"<FLOAT16,[unk__36,unk__37]> ⬅️ ::Transpose(%"model_layers_0_self_attn_rotary_emb_1_1__16") {perm=[0, 1]}
132 | # Max_82__16
%"_val_32__16"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
133 | # Shape_83__16
%"_val_33__16"<INT64,[2]> ⬅️ ::Shape(%"_val_32__16") {start=0}
134 | # Expand_84__16
%"_val_34__16"<INT64,[unk__40,unk__41]> ⬅️ ::Expand(%"view__1", %"_val_33__16")
135 | # Constant_85__16
%"_val_35__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
136 | # Unsqueeze_86__16
%"_val_36__16"<INT64,[unk__40,unk__41,1]> ⬅️ ::Unsqueeze(%"_val_34__16", %"_val_35__16")
137 | # Concat_87__16
%"_val_37__16"<INT64,[unk__40,unk__41,1]> ⬅️ ::Concat(%"_val_36__16") {axis=-1}
138 | # GatherND_88__16
%"_val_38__16"<FLOAT16,[unk__40,unk__41,unk__37]> ⬅️ ::GatherND(%"_val_31__16", %"_val_37__16") {batch_dims=0}
139 | # Transpose_89__16
%"index_1__16"<FLOAT16,[unk__40,unk__41,unk__37]> ⬅️ ::Transpose(%"_val_38__16") {perm=[0, 1, 2]}
140 | # n0__40
%"dim__40"<INT64,?> ⬅️ ::Constant() {value_int=1}
141 | # n1__40
%"dim_0__40"<INT64,?> ⬅️ ::Cast(%"dim__40") {to=7}
142 | # n2__40
%"unsqueeze_2__16"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_1__16", %"dim_0__40")
143 | # n0__41
%"mul_2__16"<FLOAT16,?> ⬅️ ::Mul(%"transpose__16", %"unsqueeze_1__16")
144 | # Constant_92__16
%"_val_42__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
145 | # Cast_93__16
%"_val_43__16"<INT64,?> ⬅️ ::Cast(%"_val_42__16") {to=7}
146 | # Constant_94__16
%"_val_44__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
147 | # Reshape_95__16
%"_val_45__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_43__16", %"_val_44__16") {allowzero=0}
148 | # Constant_96__16
%"_val_46__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
149 | # Cast_97__16
%"_val_47__16"<INT64,?> ⬅️ ::Cast(%"_val_46__16") {to=7}
150 | # Constant_98__16
%"_val_48__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
151 | # Reshape_99__16
%"_val_49__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_47__16", %"_val_48__16") {allowzero=0}
152 | # Constant_100__16
%"_val_50__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
153 | # Cast_101__16
%"_val_51__16"<INT64,?> ⬅️ ::Cast(%"_val_50__16") {to=7}
154 | # Constant_102__16
%"_val_52__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
155 | # Reshape_103__16
%"_val_53__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_51__16", %"_val_52__16") {allowzero=0}
156 | # Constant_104__16
%"_val_54__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
157 | # Cast_105__16
%"_val_55__16"<INT64,?> ⬅️ ::Cast(%"_val_54__16") {to=7}
158 | # Constant_106__16
%"_val_56__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
159 | # Reshape_107__16
%"_val_57__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_55__16", %"_val_56__16") {allowzero=0}
160 | # Slice_108__16
%"slice_3__16"<FLOAT16,[unk__42,unk__43,unk__44,unk__45]> ⬅️ ::Slice(%"transpose__16", %"_val_45__16", %"_val_49__16", %"_val_53__16", %"_val_57__16")
161 | # Constant_109__16
%"_val_59__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
162 | # Cast_110__16
%"_val_60__16"<INT64,?> ⬅️ ::Cast(%"_val_59__16") {to=7}
163 | # Constant_111__16
%"_val_61__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
164 | # Reshape_112__16
%"_val_62__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_60__16", %"_val_61__16") {allowzero=0}
165 | # Constant_113__16
%"_val_63__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
166 | # Cast_114__16
%"_val_64__16"<INT64,?> ⬅️ ::Cast(%"_val_63__16") {to=7}
167 | # Constant_115__16
%"_val_65__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
168 | # Reshape_116__16
%"_val_66__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_64__16", %"_val_65__16") {allowzero=0}
169 | # Constant_117__16
%"_val_67__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
170 | # Cast_118__16
%"_val_68__16"<INT64,?> ⬅️ ::Cast(%"_val_67__16") {to=7}
171 | # Constant_119__16
%"_val_69__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
172 | # Reshape_120__16
%"_val_70__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_68__16", %"_val_69__16") {allowzero=0}
173 | # Constant_121__16
%"_val_71__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
174 | # Cast_122__16
%"_val_72__16"<INT64,?> ⬅️ ::Cast(%"_val_71__16") {to=7}
175 | # Constant_123__16
%"_val_73__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
176 | # Reshape_124__16
%"_val_74__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_72__16", %"_val_73__16") {allowzero=0}
177 | # Slice_125__16
%"slice_4__16"<FLOAT16,[unk__46,unk__47,unk__48,unk__49]> ⬅️ ::Slice(%"transpose__16", %"_val_62__16", %"_val_66__16", %"_val_70__16", %"_val_74__16")
178 | # n0__42
%"neg__16"<FLOAT16,[unk__46,unk__47,unk__48,unk__49]> ⬅️ ::Neg(%"slice_4__16")
179 | # SequenceConstruct_127__16
%"77__16"<Sequence(Tensor(FLOAT16)),[unk__50,unk__51,unk__52,unk__53]> ⬅️ ::SequenceConstruct(%"neg__16", %"slice_3__16")
180 | # n0__43
%"cat__16"<FLOAT16,[unk__50,unk__51,unk__52,unk__54]> ⬅️ ::ConcatFromSequence(%"77__16") {axis=-1}
181 | # n0__44
%"mul_3__16"<FLOAT16,?> ⬅️ ::Mul(%"cat__16", %"unsqueeze_2__16")
182 | # n0__45
%"alpha__45"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
183 | # n1__45
%"alpha_0__45"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__45", %"mul_3__16")
184 | # n2__45
%"other_1__45"<FLOAT16,?> ⬅️ ::Mul(%"mul_3__16", %"alpha_0__45")
185 | # n3__45
%"add_1__16"<FLOAT16,?> ⬅️ ::Add(%"mul_2__16", %"other_1__45")
186 | # n0__46
%"mul_4__16"<FLOAT16,?> ⬅️ ::Mul(%"transpose_1__16", %"unsqueeze_1__16")
187 | # Constant_132__16
%"_val_82__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
188 | # Cast_133__16
%"_val_83__16"<INT64,?> ⬅️ ::Cast(%"_val_82__16") {to=7}
189 | # Constant_134__16
%"_val_84__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
190 | # Reshape_135__16
%"_val_85__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_83__16", %"_val_84__16") {allowzero=0}
191 | # Constant_136__16
%"_val_86__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
192 | # Cast_137__16
%"_val_87__16"<INT64,?> ⬅️ ::Cast(%"_val_86__16") {to=7}
193 | # Constant_138__16
%"_val_88__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
194 | # Reshape_139__16
%"_val_89__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_87__16", %"_val_88__16") {allowzero=0}
195 | # Constant_140__16
%"_val_90__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
196 | # Cast_141__16
%"_val_91__16"<INT64,?> ⬅️ ::Cast(%"_val_90__16") {to=7}
197 | # Constant_142__16
%"_val_92__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
198 | # Reshape_143__16
%"_val_93__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_91__16", %"_val_92__16") {allowzero=0}
199 | # Constant_144__16
%"_val_94__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
200 | # Cast_145__16
%"_val_95__16"<INT64,?> ⬅️ ::Cast(%"_val_94__16") {to=7}
201 | # Constant_146__16
%"_val_96__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
202 | # Reshape_147__16
%"_val_97__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_95__16", %"_val_96__16") {allowzero=0}
203 | # Slice_148__16
%"slice_5__16"<FLOAT16,[unk__55,unk__56,unk__57,unk__58]> ⬅️ ::Slice(%"transpose_1__16", %"_val_85__16", %"_val_89__16", %"_val_93__16", %"_val_97__16")
204 | # Constant_149__16
%"_val_99__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
205 | # Cast_150__16
%"_val_100__16"<INT64,?> ⬅️ ::Cast(%"_val_99__16") {to=7}
206 | # Constant_151__16
%"_val_101__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
207 | # Reshape_152__16
%"_val_102__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_100__16", %"_val_101__16") {allowzero=0}
208 | # Constant_153__16
%"_val_103__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
209 | # Cast_154__16
%"_val_104__16"<INT64,?> ⬅️ ::Cast(%"_val_103__16") {to=7}
210 | # Constant_155__16
%"_val_105__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
211 | # Reshape_156__16
%"_val_106__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_104__16", %"_val_105__16") {allowzero=0}
212 | # Constant_157__16
%"_val_107__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
213 | # Cast_158__16
%"_val_108__16"<INT64,?> ⬅️ ::Cast(%"_val_107__16") {to=7}
214 | # Constant_159__16
%"_val_109__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
215 | # Reshape_160__16
%"_val_110__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_108__16", %"_val_109__16") {allowzero=0}
216 | # Constant_161__16
%"_val_111__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
217 | # Cast_162__16
%"_val_112__16"<INT64,?> ⬅️ ::Cast(%"_val_111__16") {to=7}
218 | # Constant_163__16
%"_val_113__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
219 | # Reshape_164__16
%"_val_114__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_112__16", %"_val_113__16") {allowzero=0}
220 | # Slice_165__16
%"slice_6__16"<FLOAT16,[unk__59,unk__60,unk__61,unk__62]> ⬅️ ::Slice(%"transpose_1__16", %"_val_102__16", %"_val_106__16", %"_val_110__16", %"_val_114__16")
221 | # n0__47
%"neg_1__16"<FLOAT16,[unk__59,unk__60,unk__61,unk__62]> ⬅️ ::Neg(%"slice_6__16")
222 | # SequenceConstruct_167__16
%"117__16"<Sequence(Tensor(FLOAT16)),[unk__63,unk__64,unk__65,unk__66]> ⬅️ ::SequenceConstruct(%"neg_1__16", %"slice_5__16")
223 | # n0__48
%"cat_1__16"<FLOAT16,[unk__63,unk__64,unk__65,unk__67]> ⬅️ ::ConcatFromSequence(%"117__16") {axis=-1}
224 | # n0__49
%"mul_5__16"<FLOAT16,?> ⬅️ ::Mul(%"cat_1__16", %"unsqueeze_2__16")
225 | # n0__50
%"alpha__50"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
226 | # n1__50
%"alpha_0__50"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__50", %"mul_5__16")
227 | # n2__50
%"other_1__50"<FLOAT16,?> ⬅️ ::Mul(%"mul_5__16", %"alpha_0__50")
228 | # n3__50
%"model_1_1"<FLOAT16,[1,8,128,128]> ⬅️ ::Add(%"mul_4__16", %"other_1__50")
229 | # Constant_171__16
%"_val_121__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
230 | # Cast_172__16
%"_val_122__16"<INT64,?> ⬅️ ::Cast(%"_val_121__16") {to=7}
231 | # Constant_173__16
%"_val_123__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
232 | # Reshape_174__16
%"_val_124__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_122__16", %"_val_123__16") {allowzero=0}
233 | # Constant_175__16
%"_val_125__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
234 | # Cast_176__16
%"_val_126__16"<INT64,?> ⬅️ ::Cast(%"_val_125__16") {to=7}
235 | # Constant_177__16
%"_val_127__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
236 | # Reshape_178__16
%"_val_128__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_126__16", %"_val_127__16") {allowzero=0}
237 | # Constant_179__16
%"_val_129__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
238 | # Cast_180__16
%"_val_130__16"<INT64,?> ⬅️ ::Cast(%"_val_129__16") {to=7}
239 | # Constant_181__16
%"_val_131__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
240 | # Reshape_182__16
%"_val_132__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_130__16", %"_val_131__16") {allowzero=0}
241 | # Constant_183__16
%"_val_133__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
242 | # Cast_184__16
%"_val_134__16"<INT64,?> ⬅️ ::Cast(%"_val_133__16") {to=7}
243 | # Constant_185__16
%"_val_135__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
244 | # Reshape_186__16
%"_val_136__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_134__16", %"_val_135__16") {allowzero=0}
245 | # Slice_187__16
%"slice_7__16"<FLOAT16,[unk__68,unk__69,unk__70,unk__71]> ⬅️ ::Slice(%"model_1_1", %"_val_124__16", %"_val_128__16", %"_val_132__16", %"_val_136__16")
246 | # Constant_188__16
%"_val_138__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
247 | # Cast_189__16
%"_val_139__16"<INT64,?> ⬅️ ::Cast(%"_val_138__16") {to=7}
248 | # Constant_190__16
%"_val_140__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
249 | # Reshape_191__16
%"_val_141__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_139__16", %"_val_140__16") {allowzero=0}
250 | # Constant_192__16
%"_val_142__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
251 | # Cast_193__16
%"_val_143__16"<INT64,?> ⬅️ ::Cast(%"_val_142__16") {to=7}
252 | # Constant_194__16
%"_val_144__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
253 | # Reshape_195__16
%"_val_145__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_143__16", %"_val_144__16") {allowzero=0}
254 | # Constant_196__16
%"_val_146__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
255 | # Cast_197__16
%"_val_147__16"<INT64,?> ⬅️ ::Cast(%"_val_146__16") {to=7}
256 | # Constant_198__16
%"_val_148__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
257 | # Reshape_199__16
%"_val_149__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_147__16", %"_val_148__16") {allowzero=0}
258 | # Constant_200__16
%"_val_150__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
259 | # Cast_201__16
%"_val_151__16"<INT64,?> ⬅️ ::Cast(%"_val_150__16") {to=7}
260 | # Constant_202__16
%"_val_152__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
261 | # Reshape_203__16
%"_val_153__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_151__16", %"_val_152__16") {allowzero=0}
262 | # Slice_204__16
%"slice_8__16"<FLOAT16,[unk__72,unk__73,unk__74,unk__75]> ⬅️ ::Slice(%"slice_7__16", %"_val_141__16", %"_val_145__16", %"_val_149__16", %"_val_153__16")
263 | # n0__51
%"dim__51"<INT64,?> ⬅️ ::Constant() {value_int=2}
264 | # n1__51
%"dim_0__51"<INT64,?> ⬅️ ::Cast(%"dim__51") {to=7}
265 | # n2__51
%"unsqueeze_3__16"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_8__16", %"dim_0__51")
266 | # Constant_206__16
%"_val_156__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
267 | # Cast_207__16
%"_val_157__16"<INT64,?> ⬅️ ::Cast(%"_val_156__16") {to=7}
268 | # Constant_208__16
%"_val_158__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
269 | # Reshape_209__16
%"_val_159__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_157__16", %"_val_158__16") {allowzero=0}
270 | # Constant_210__16
%"_val_160__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
271 | # Cast_211__16
%"_val_161__16"<INT64,?> ⬅️ ::Cast(%"_val_160__16") {to=7}
272 | # Constant_212__16
%"_val_162__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
273 | # Reshape_213__16
%"_val_163__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_161__16", %"_val_162__16") {allowzero=0}
274 | # Constant_214__16
%"_val_164__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
275 | # Cast_215__16
%"_val_165__16"<INT64,?> ⬅️ ::Cast(%"_val_164__16") {to=7}
276 | # Constant_216__16
%"_val_166__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
277 | # Reshape_217__16
%"_val_167__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_165__16", %"_val_166__16") {allowzero=0}
278 | # Constant_218__16
%"_val_168__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
279 | # Cast_219__16
%"_val_169__16"<INT64,?> ⬅️ ::Cast(%"_val_168__16") {to=7}
280 | # Constant_220__16
%"_val_170__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
281 | # Reshape_221__16
%"_val_171__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_169__16", %"_val_170__16") {allowzero=0}
282 | # Slice_222__16
%"slice_9__16"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_3__16", %"_val_159__16", %"_val_163__16", %"_val_167__16", %"_val_171__16")
283 | # Constant_223__16
%"_val_173__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
284 | # Cast_224__16
%"_val_174__16"<INT64,?> ⬅️ ::Cast(%"_val_173__16") {to=7}
285 | # Constant_225__16
%"_val_175__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
286 | # Reshape_226__16
%"_val_176__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_174__16", %"_val_175__16") {allowzero=0}
287 | # Constant_227__16
%"_val_177__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
288 | # Cast_228__16
%"_val_178__16"<INT64,?> ⬅️ ::Cast(%"_val_177__16") {to=7}
289 | # Constant_229__16
%"_val_179__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
290 | # Reshape_230__16
%"_val_180__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_178__16", %"_val_179__16") {allowzero=0}
291 | # Constant_231__16
%"_val_181__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
292 | # Cast_232__16
%"_val_182__16"<INT64,?> ⬅️ ::Cast(%"_val_181__16") {to=7}
293 | # Constant_233__16
%"_val_183__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
294 | # Reshape_234__16
%"_val_184__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_182__16", %"_val_183__16") {allowzero=0}
295 | # Constant_235__16
%"_val_185__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
296 | # Cast_236__16
%"_val_186__16"<INT64,?> ⬅️ ::Cast(%"_val_185__16") {to=7}
297 | # Constant_237__16
%"_val_187__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
298 | # Reshape_238__16
%"_val_188__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_186__16", %"_val_187__16") {allowzero=0}
299 | # Slice_239__16
%"slice_10__16"<FLOAT16,?> ⬅️ ::Slice(%"slice_9__16", %"_val_176__16", %"_val_180__16", %"_val_184__16", %"_val_188__16")
300 | # Constant_240__16
%"_val_190__16"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
301 | # n0__52
%"size_0__52"<INT64,[5]> ⬅️ ::Cast(%"_val_190__16") {to=7}
302 | # n1__52
%"size_1__52"<INT64,[5]> ⬅️ ::Abs(%"size_0__52")
303 | # n2__52
%"expand__16"<FLOAT16,?> ⬅️ ::Expand(%"slice_10__16", %"size_1__52")
304 | # n0__53
%"clone__16"<FLOAT16,?> ⬅️ ::Identity(%"expand__16")
305 | # Constant_243__16
%"_val_193__16"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
306 | # n0__54
%"size_0__54"<INT64,[4]> ⬅️ ::Cast(%"_val_193__16") {to=7}
307 | # n1__54
%"view_10__16"<FLOAT16,[unk__76,unk__77,unk__78,unk__79]> ⬅️ ::Reshape(%"clone__16", %"size_0__54")
308 | # Constant_245__16
%"_val_195__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
309 | # Cast_246__16
%"_val_196__16"<INT64,?> ⬅️ ::Cast(%"_val_195__16") {to=7}
310 | # Constant_247__16
%"_val_197__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
311 | # Reshape_248__16
%"_val_198__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_196__16", %"_val_197__16") {allowzero=0}
312 | # Constant_249__16
%"_val_199__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
313 | # Cast_250__16
%"_val_200__16"<INT64,?> ⬅️ ::Cast(%"_val_199__16") {to=7}
314 | # Constant_251__16
%"_val_201__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
315 | # Reshape_252__16
%"_val_202__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_200__16", %"_val_201__16") {allowzero=0}
316 | # Constant_253__16
%"_val_203__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
317 | # Cast_254__16
%"_val_204__16"<INT64,?> ⬅️ ::Cast(%"_val_203__16") {to=7}
318 | # Constant_255__16
%"_val_205__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
319 | # Reshape_256__16
%"_val_206__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_204__16", %"_val_205__16") {allowzero=0}
320 | # Constant_257__16
%"_val_207__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
321 | # Cast_258__16
%"_val_208__16"<INT64,?> ⬅️ ::Cast(%"_val_207__16") {to=7}
322 | # Constant_259__16
%"_val_209__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
323 | # Reshape_260__16
%"_val_210__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_208__16", %"_val_209__16") {allowzero=0}
324 | # Slice_261__16
%"slice_11__16"<FLOAT16,[unk__80,unk__81,unk__82,unk__83]> ⬅️ ::Slice(%"model_1", %"_val_198__16", %"_val_202__16", %"_val_206__16", %"_val_210__16")
325 | # Constant_262__16
%"_val_212__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
326 | # Cast_263__16
%"_val_213__16"<INT64,?> ⬅️ ::Cast(%"_val_212__16") {to=7}
327 | # Constant_264__16
%"_val_214__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
328 | # Reshape_265__16
%"_val_215__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_213__16", %"_val_214__16") {allowzero=0}
329 | # Constant_266__16
%"_val_216__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
330 | # Cast_267__16
%"_val_217__16"<INT64,?> ⬅️ ::Cast(%"_val_216__16") {to=7}
331 | # Constant_268__16
%"_val_218__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
332 | # Reshape_269__16
%"_val_219__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_217__16", %"_val_218__16") {allowzero=0}
333 | # Constant_270__16
%"_val_220__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
334 | # Cast_271__16
%"_val_221__16"<INT64,?> ⬅️ ::Cast(%"_val_220__16") {to=7}
335 | # Constant_272__16
%"_val_222__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
336 | # Reshape_273__16
%"_val_223__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_221__16", %"_val_222__16") {allowzero=0}
337 | # Constant_274__16
%"_val_224__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
338 | # Cast_275__16
%"_val_225__16"<INT64,?> ⬅️ ::Cast(%"_val_224__16") {to=7}
339 | # Constant_276__16
%"_val_226__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
340 | # Reshape_277__16
%"_val_227__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_225__16", %"_val_226__16") {allowzero=0}
341 | # Slice_278__16
%"slice_12__16"<FLOAT16,[unk__84,unk__85,unk__86,unk__87]> ⬅️ ::Slice(%"slice_11__16", %"_val_215__16", %"_val_219__16", %"_val_223__16", %"_val_227__16")
342 | # n0__55
%"dim__55"<INT64,?> ⬅️ ::Constant() {value_int=2}
343 | # n1__55
%"dim_0__55"<INT64,?> ⬅️ ::Cast(%"dim__55") {to=7}
344 | # n2__55
%"unsqueeze_4__16"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_12__16", %"dim_0__55")
345 | # Constant_280__16
%"_val_230__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
346 | # Cast_281__16
%"_val_231__16"<INT64,?> ⬅️ ::Cast(%"_val_230__16") {to=7}
347 | # Constant_282__16
%"_val_232__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
348 | # Reshape_283__16
%"_val_233__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_231__16", %"_val_232__16") {allowzero=0}
349 | # Constant_284__16
%"_val_234__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
350 | # Cast_285__16
%"_val_235__16"<INT64,?> ⬅️ ::Cast(%"_val_234__16") {to=7}
351 | # Constant_286__16
%"_val_236__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
352 | # Reshape_287__16
%"_val_237__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_235__16", %"_val_236__16") {allowzero=0}
353 | # Constant_288__16
%"_val_238__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
354 | # Cast_289__16
%"_val_239__16"<INT64,?> ⬅️ ::Cast(%"_val_238__16") {to=7}
355 | # Constant_290__16
%"_val_240__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
356 | # Reshape_291__16
%"_val_241__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_239__16", %"_val_240__16") {allowzero=0}
357 | # Constant_292__16
%"_val_242__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
358 | # Cast_293__16
%"_val_243__16"<INT64,?> ⬅️ ::Cast(%"_val_242__16") {to=7}
359 | # Constant_294__16
%"_val_244__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
360 | # Reshape_295__16
%"_val_245__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_243__16", %"_val_244__16") {allowzero=0}
361 | # Slice_296__16
%"slice_13__16"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_4__16", %"_val_233__16", %"_val_237__16", %"_val_241__16", %"_val_245__16")
362 | # Constant_297__16
%"_val_247__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
363 | # Cast_298__16
%"_val_248__16"<INT64,?> ⬅️ ::Cast(%"_val_247__16") {to=7}
364 | # Constant_299__16
%"_val_249__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
365 | # Reshape_300__16
%"_val_250__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_248__16", %"_val_249__16") {allowzero=0}
366 | # Constant_301__16
%"_val_251__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
367 | # Cast_302__16
%"_val_252__16"<INT64,?> ⬅️ ::Cast(%"_val_251__16") {to=7}
368 | # Constant_303__16
%"_val_253__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
369 | # Reshape_304__16
%"_val_254__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_252__16", %"_val_253__16") {allowzero=0}
370 | # Constant_305__16
%"_val_255__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
371 | # Cast_306__16
%"_val_256__16"<INT64,?> ⬅️ ::Cast(%"_val_255__16") {to=7}
372 | # Constant_307__16
%"_val_257__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
373 | # Reshape_308__16
%"_val_258__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_256__16", %"_val_257__16") {allowzero=0}
374 | # Constant_309__16
%"_val_259__16"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
375 | # Cast_310__16
%"_val_260__16"<INT64,?> ⬅️ ::Cast(%"_val_259__16") {to=7}
376 | # Constant_311__16
%"_val_261__16"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
377 | # Reshape_312__16
%"_val_262__16"<INT64,[1]> ⬅️ ::Reshape(%"_val_260__16", %"_val_261__16") {allowzero=0}
378 | # Slice_313__16
%"slice_14__16"<FLOAT16,?> ⬅️ ::Slice(%"slice_13__16", %"_val_250__16", %"_val_254__16", %"_val_258__16", %"_val_262__16")
379 | # Constant_314__16
%"_val_264__16"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
380 | # n0__56
%"size_0__56"<INT64,[5]> ⬅️ ::Cast(%"_val_264__16") {to=7}
381 | # n1__56
%"size_1__56"<INT64,[5]> ⬅️ ::Abs(%"size_0__56")
382 | # n2__56
%"expand_1__16"<FLOAT16,?> ⬅️ ::Expand(%"slice_14__16", %"size_1__56")
383 | # n0__57
%"clone_1__16"<FLOAT16,?> ⬅️ ::Identity(%"expand_1__16")
384 | # Constant_317__16
%"_val_267__16"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
385 | # n0__58
%"size_0__58"<INT64,[4]> ⬅️ ::Cast(%"_val_267__16") {to=7}
386 | # n1__58
%"view_11__16"<FLOAT16,[unk__88,unk__89,unk__90,unk__91]> ⬅️ ::Reshape(%"clone_1__16", %"size_0__58")
387 | # n0__59
%"tmp__59"<INT64,[unk__92]> ⬅️ ::Shape(%"add_1__16")
388 | # n1__59
%"int64_m1__59"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_m1')}
389 | # n2__59
%"tmp_subscripted__59"<INT64,?> ⬅️ ::Gather(%"tmp__59", %"int64_m1__59") {axis=0}
390 | # n3__59
%"embedding_size__59"<FLOAT16,?> ⬅️ ::CastLike(%"tmp_subscripted__59", %"add_1__16")
391 | # n4__59
%"const__59"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
392 | # n5__59
%"tmp_0__59"<FLOAT16,?> ⬅️ ::Sqrt(%"embedding_size__59")
393 | # n6__59
%"const_cast__59"<FLOAT16,?> ⬅️ ::CastLike(%"const__59", %"tmp_0__59")
394 | # n7__59
%"_val_269__16"<FLOAT16,?> ⬅️ ::Div(%"const_cast__59", %"tmp_0__59")
395 | # CastLike_320__16
%"_val_270__16"<FLOAT16,?> ⬅️ ::CastLike(%"_val_269__16", %"add_1__16")
396 | # n0__60
%"tmp__60"<INT64,[unk__93]> ⬅️ ::Shape(%"add_1__16")
397 | # n1__60
%"int64_0_1d__60"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
398 | # n2__60
%"int64_1_1d__60"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
399 | # n3__60
%"int64_m2_1d__60"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
400 | # n4__60
%"int64_m1_1d__60"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
401 | # n5__60
%"target_length__60"<INT64,[unk__94]> ⬅️ ::Slice(%"tmp__60", %"int64_m2_1d__60", %"int64_m1_1d__60", %"int64_0_1d__60", %"int64_1_1d__60")
402 | # n6__60
%"tmp_0__60"<INT64,[4]> ⬅️ ::Shape(%"view_10__16")
403 | # n7__60
%"int64_0_1d_1__60"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
404 | # n8__60
%"int64_1_1d_2__60"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
405 | # n9__60
%"int64_m2_1d_3__60"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_3')}
406 | # n10__60
%"int64_m1_1d_4__60"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_4')}
407 | # n11__60
%"source_length__60"<INT64,[1]> ⬅️ ::Slice(%"tmp_0__60", %"int64_m2_1d_3__60", %"int64_m1_1d_4__60", %"int64_0_1d_1__60", %"int64_1_1d_2__60")
408 | # n12__60
%"size__60"<INT64,[unk__95]> ⬅️ ::Concat(%"target_length__60", %"source_length__60") {axis=0}
409 | # n13__60
%"const__60"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
410 | # n14__60
%"attn_mask__60"<FLOAT,?> ⬅️ ::Expand(%"const__60", %"size__60")
411 | # n15__60
%"attn_mask_5__60"<FLOAT,?> ⬅️ ::Trilu(%"attn_mask__60") {upper=0}
412 | # n16__60
%"const_6__60"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_6')}
413 | # n17__60
%"const_6_cast__60"<FLOAT,?> ⬅️ ::CastLike(%"const_6__60", %"attn_mask_5__60")
414 | # n18__60
%"tmp_7__60"<BOOL,?> ⬅️ ::Equal(%"attn_mask_5__60", %"const_6_cast__60")
415 | # n19__60
%"tmp_8__60"<FLOAT,?> ⬅️ ::Constant() {value_float=-inf}
416 | # n20__60
%"const_9__60"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
417 | # n21__60
%"const_9_cast__60"<FLOAT,?> ⬅️ ::CastLike(%"const_9__60", %"tmp_8__60")
418 | # n22__60
%"attn_mask_10__60"<FLOAT,?> ⬅️ ::Where(%"tmp_7__60", %"tmp_8__60", %"const_9_cast__60")
419 | # n23__60
%"_val_271__16"<FLOAT16,?> ⬅️ ::CastLike(%"attn_mask_10__60", %"add_1__16")
420 | # n0__61
%"key_shape__61"<INT64,[4]> ⬅️ ::Shape(%"view_10__16")
421 | # n1__61
%"int64_0_1d__61"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
422 | # n2__61
%"int64_1_1d__61"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
423 | # n3__61
%"int64_m1_1d__61"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
424 | # n4__61
%"int64_9223372036854775807_1d__61"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_9223372036854775807_1d')}
425 | # n5__61
%"key_last_dim__61"<INT64,[1]> ⬅️ ::Slice(%"key_shape__61", %"int64_m1_1d__61", %"int64_9223372036854775807_1d__61", %"int64_0_1d__61", %"int64_1_1d__61")
426 | # n6__61
%"int64_0_1d_0__61"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_0')}
427 | # n7__61
%"int64_1_1d_1__61"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_1')}
428 | # n8__61
%"int64_m2_1d__61"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
429 | # n9__61
%"int64_m1_1d_2__61"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_2')}
430 | # n10__61
%"key_second_last_dim__61"<INT64,[1]> ⬅️ ::Slice(%"key_shape__61", %"int64_m2_1d__61", %"int64_m1_1d_2__61", %"int64_0_1d_0__61", %"int64_1_1d_1__61")
431 | # n11__61
%"int64_0_1d_3__61"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
432 | # n12__61
%"int64_1_1d_4__61"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
433 | # n13__61
%"int64_m2_1d_5__61"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_5')}
434 | # n14__61
%"key_first_dims__61"<INT64,[2]> ⬅️ ::Slice(%"key_shape__61", %"int64_0_1d_3__61", %"int64_m2_1d_5__61", %"int64_0_1d_3__61", %"int64_1_1d_4__61")
435 | # n15__61
%"tmp__61"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
436 | # n16__61
%"key_squeezed_shape__61"<INT64,[3]> ⬅️ ::Concat(%"tmp__61", %"key_second_last_dim__61", %"key_last_dim__61") {axis=0}
437 | # n17__61
%"key_squeezed__61"<FLOAT16,[unk__96,unk__97,unk__98]> ⬅️ ::Reshape(%"view_10__16", %"key_squeezed_shape__61")
438 | # n18__61
%"key_squeezed_transposed__61"<FLOAT16,[unk__96,unk__98,unk__97]> ⬅️ ::Transpose(%"key_squeezed__61") {perm=[0, 2, 1]}
439 | # n19__61
%"key_transposed_shape__61"<INT64,[4]> ⬅️ ::Concat(%"key_first_dims__61", %"key_last_dim__61", %"key_second_last_dim__61") {axis=0}
440 | # n20__61
%"key_transposed__61"<FLOAT16,[unk__99,unk__100,unk__101,unk__102]> ⬅️ ::Reshape(%"key_squeezed_transposed__61", %"key_transposed_shape__61")
441 | # n21__61
%"tmp_6__61"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__16")
442 | # n22__61
%"query_scaled__61"<FLOAT16,?> ⬅️ ::Mul(%"add_1__16", %"tmp_6__61")
443 | # n23__61
%"tmp_7__61"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__16")
444 | # n24__61
%"key_transposed_scaled__61"<FLOAT16,[unk__99,unk__100,unk__101,unk__102]> ⬅️ ::Mul(%"key_transposed__61", %"tmp_7__61")
445 | # n25__61
%"tmp_8__61"<FLOAT16,?> ⬅️ ::MatMul(%"query_scaled__61", %"key_transposed_scaled__61")
446 | # n26__61
%"tmp_9__61"<FLOAT16,?> ⬅️ ::Add(%"tmp_8__61", %"_val_271__16")
447 | # n27__61
%"attn_weight__61"<FLOAT16,?> ⬅️ ::Softmax(%"tmp_9__61") {axis=-1}
448 | # n28__61
%"dropout_p__61"<FLOAT,?> ⬅️ ::Constant() {value_float=0.0}
449 | # n29__61
%"attn_weight_10__61"<FLOAT16,?>, %"___61"<BOOL,?> ⬅️ ::Dropout(%"attn_weight__61", %"dropout_p__61")
450 | # n30__61
%"_scaled_dot_product_efficient_attention__16"<FLOAT16,?> ⬅️ ::MatMul(%"attn_weight_10__61", %"view_11__16")
451 | # n0__62
%"query_0__62"<FLOAT16,?> ⬅️ ::Transpose(%"add_1__16") {perm=[0, 2, 1, 3]}
452 | # n1__62
%"query_shape__62"<INT64,[unk__103]> ⬅️ ::Shape(%"query_0__62")
453 | # n2__62
%"int64_0_1d__62"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
454 | # n3__62
%"int64_1_1d__62"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
455 | # n4__62
%"query_first_dims__62"<INT64,[unk__104]> ⬅️ ::Slice(%"query_shape__62", %"int64_0_1d__62", %"int64_1_1d__62", %"int64_0_1d__62", %"int64_1_1d__62")
456 | # n5__62
%"int64_0_1d_1__62"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
457 | # n6__62
%"int64_1_1d_2__62"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
458 | # n7__62
%"int64_2_1d__62"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_2_1d')}
459 | # n8__62
%"query_second_dims__62"<INT64,[unk__105]> ⬅️ ::Slice(%"query_shape__62", %"int64_1_1d_2__62", %"int64_2_1d__62", %"int64_0_1d_1__62", %"int64_1_1d_2__62")
460 | # n9__62
%"int64_0_1d_3__62"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
461 | # n10__62
%"int64_1_1d_4__62"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
462 | # n11__62
%"int64_m2_1d__62"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
463 | # n12__62
%"int64_m1_1d__62"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
464 | # n13__62
%"num_heads__62"<INT64,[unk__106]> ⬅️ ::Slice(%"query_shape__62", %"int64_m2_1d__62", %"int64_m1_1d__62", %"int64_0_1d_3__62", %"int64_1_1d_4__62")
465 | # n14__62
%"compute_log_sumexp__62"<INT64,?> ⬅️ ::Constant() {value_int=0}
466 | # n15__62
%"compute_log_sumexp_as_bool__62"<BOOL,?> ⬅️ ::Cast(%"compute_log_sumexp__62") {to=9}
467 | # n16__62
%"_scaled_dot_product_efficient_attention_1__16"<FLOAT,?> ⬅️ ::If(%"compute_log_sumexp_as_bool__62") {then_branch=
graph(
name=thenGraph_14,
inputs=(
),
outputs=(
%"logsum_exp__62"<FLOAT,?>
),
) {
0 | # n0__62_12
%"tmp__62"<FLOAT,[unk__105]> ⬅️ ::Cast(%"query_second_dims__62") {to=1}
1 | # n1__62_13
%"const__62"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
2 | # n2__62_14
%"const_cast__62"<FLOAT,?> ⬅️ ::CastLike(%"const__62", %"tmp__62")
3 | # n3__62_15
%"tmp_5__62"<FLOAT,[unk__105]> ⬅️ ::Div(%"tmp__62", %"const_cast__62")
4 | # n4__62_16
%"tmp_6__62"<FLOAT,[unk__105]> ⬅️ ::Ceil(%"tmp_5__62")
5 | # n5__62_17
%"const_7__62"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_7')}
6 | # n6__62_18
%"const_7_cast__62"<FLOAT,?> ⬅️ ::CastLike(%"const_7__62", %"tmp_6__62")
7 | # n7__62_19
%"tmp_8__62"<FLOAT,[unk__105]> ⬅️ ::Mul(%"tmp_6__62", %"const_7_cast__62")
8 | # n8__62_20
%"logsumexp_dim__62"<INT64,[unk__105]> ⬅️ ::Cast(%"tmp_8__62") {to=7}
9 | # n9__62_21
%"const_9__62"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
10 | # n10__62_22
%"tmp_10__62"<INT64,[unk__107]> ⬅️ ::Concat(%"query_first_dims__62", %"num_heads__62", %"logsumexp_dim__62") {axis=0}
11 | # n11__62_23
%"logsum_exp__62"<FLOAT,?> ⬅️ ::Expand(%"const_9__62", %"tmp_10__62")
return %"logsum_exp__62"<FLOAT,?>
}, else_branch=
graph(
name=elseGraph_14,
inputs=(
),
outputs=(
%"logsum_exp_14__62"<FLOAT,?>
),
) {
0 | # n0__62_24
%"const_11__62"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_11')}
1 | # n1__62_25
%"int64_0_1d_12__62"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_12')}
2 | # n2__62_26
%"int64_0_1d_12_cast__62"<INT64,[1]> ⬅️ ::CastLike(%"int64_0_1d_12__62", %"num_heads__62")
3 | # n3__62_27
%"tmp_13__62"<INT64,[unk__108]> ⬅️ ::Concat(%"query_first_dims__62", %"num_heads__62", %"int64_0_1d_12_cast__62") {axis=0}
4 | # n4__62_28
%"logsum_exp_14__62"<FLOAT,?> ⬅️ ::Expand(%"const_11__62", %"tmp_13__62")
return %"logsum_exp_14__62"<FLOAT,?>
}}
468 | # n17__62
%"tmp_16__62"<INT64,[0]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[0]>('Empty_INTS')}
469 | # n18__62
%"tmp_17__62"<FLOAT,?> ⬅️ ::ConstantOfShape(%"tmp_16__62")
470 | # n19__62
%"_scaled_dot_product_efficient_attention_3__16"<INT64,?> ⬅️ ::Cast(%"tmp_17__62") {to=7}
471 | # Transpose_324__16
%"transpose_3__16"<FLOAT16,?> ⬅️ ::Transpose(%"_scaled_dot_product_efficient_attention__16") {perm=[0, 2, 1, 3]}
472 | # Constant_325__16
%"_val_276__16"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
473 | # n0__63
%"size_0__63"<INT64,[3]> ⬅️ ::Cast(%"_val_276__16") {to=7}
474 | # n1__63
%"view_12__16"<FLOAT16,[unk__109,unk__110,unk__111]> ⬅️ ::Reshape(%"transpose_3__16", %"size_0__63")
475 | # n0__66
%"tmp__66"<INT64,[2]> ⬅️ ::Shape(%"model.layers.0.self_attn.o_proj.weight")
476 | # n1__66
%"rank__65"<INT64,?> ⬅️ ::Size(%"tmp__66")
477 | # n1__65
%"int64_2__65"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
478 | # n2__65
%"int64_2_cast__65"<INT64,?> ⬅️ ::CastLike(%"int64_2__65", %"rank__65")
479 | # n3__65
%"cond__65"<BOOL,?> ⬅️ ::Equal(%"rank__65", %"int64_2_cast__65")
480 | # n4__65
%"t_3__64"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__65") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__65"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__65_29
%"result__65"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.0.self_attn.o_proj.weight") {perm=[1, 0]}
return %"result__65"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__65"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__65_30
%"result_0__65"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.0.self_attn.o_proj.weight")
return %"result_0__65"<FLOAT16,[4096,4096]>
}}
481 | # Constant_3__64
%"_val_3__64"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
482 | # n0__67
%"size_0__67"<INT64,[2]> ⬅️ ::Cast(%"_val_3__64") {to=7}
483 | # n1__67
%"view_13__64"<FLOAT16,[unk__112,unk__113]> ⬅️ ::Reshape(%"view_12__16", %"size_0__67")
484 | # n0__68
%"mm_3__64"<FLOAT16,[unk__112,4096]> ⬅️ ::MatMul(%"view_13__64", %"t_3__64")
485 | # Constant_6__64
%"_val_6__64"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
486 | # n0__69
%"size_0__69"<INT64,[3]> ⬅️ ::Cast(%"_val_6__64") {to=7}
487 | # n1__69
%"model_layers_0_self_attn_1_2__6"<FLOAT16,[unk__114,unk__115,unk__116]> ⬅️ ::Reshape(%"mm_3__64", %"size_0__69")
488 | # n0__70
%"alpha__70"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
489 | # n1__70
%"alpha_0__70"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__70", %"model_layers_0_self_attn_1_2__6")
490 | # n2__70
%"other_1__70"<FLOAT16,[unk__114,unk__115,unk__116]> ⬅️ ::Mul(%"model_layers_0_self_attn_1_2__6", %"alpha_0__70")
491 | # n3__70
%"add_3__6"<FLOAT16,[unk__114,128,4096]> ⬅️ ::Add(%"model_embed_tokens_1__1", %"other_1__70")
492 | # Cast_3__71
%"_to_copy_2__71"<FLOAT,[unk__114,128,4096]> ⬅️ ::Cast(%"add_3__6") {to=1}
493 | # Constant_4__71
%"_val_2__71"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
494 | # Cast_5__71
%"scalar_tensor_default_1__71"<FLOAT,?> ⬅️ ::Cast(%"_val_2__71") {to=1}
495 | # n0__72
%"pow_2__71"<FLOAT,[unk__114,128,4096]> ⬅️ ::Pow(%"_to_copy_2__71", %"scalar_tensor_default_1__71")
496 | # Constant_7__71
%"_val_5__71"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
497 | # n0__74
%"tmp__74"<INT64,[3]> ⬅️ ::Shape(%"pow_2__71")
498 | # n1__74
%"tmp_0__74"<INT64,?> ⬅️ ::Size(%"tmp__74")
499 | # n2__74
%"tmp_1__74"<INT64,?> ⬅️ ::Constant() {value_int=0}
500 | # n3__74
%"cond__73"<BOOL,?> ⬅️ ::Equal(%"tmp_0__74", %"tmp_1__74")
501 | # n1__73
%"mean_1__71"<FLOAT,?> ⬅️ ::If(%"cond__73") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__73"<FLOAT,[unk__114,128,4096]>
),
) {
0 | # n0__73_31
%"result__73"<FLOAT,[unk__114,128,4096]> ⬅️ ::Identity(%"pow_2__71")
return %"result__73"<FLOAT,[unk__114,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__73"<FLOAT,?>
),
) {
0 | # n0__75
%"tmp__75"<INT64,[1]> ⬅️ ::Shape(%"_val_5__71")
1 | # n1__75
%"tmp_0__75"<INT64,?> ⬅️ ::Size(%"tmp__75")
2 | # n2__75
%"tmp_1__75"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__75
%"cond_0__73"<BOOL,?> ⬅️ ::Equal(%"tmp_0__75", %"tmp_1__75")
4 | # n1__73_33
%"dim_3__73"<INT64,?> ⬅️ ::If(%"cond_0__73") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__73"<INT64,[1,1]>
),
) {
0 | # n0__73_34
%"int64_0__73"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__73_35
%"dim_1__73"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__71", %"int64_0__73")
return %"dim_1__73"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__73"<INT64,[1]>
),
) {
0 | # n0__73_36
%"dim_2__73"<INT64,[1]> ⬅️ ::Identity(%"_val_5__71")
return %"dim_2__73"<INT64,[1]>
}}
5 | # n2__73
%"result_4__73"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_2__71", %"dim_3__73") {keepdims=1}
return %"result_4__73"<FLOAT,?>
}}
502 | # Constant_9__71
%"_val_7__71"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
503 | # n0__76
%"alpha__76"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
504 | # n1__76
%"alpha_0__76"<FLOAT,?> ⬅️ ::CastLike(%"alpha__76", %"_val_7__71")
505 | # n2__76
%"other_1__76"<FLOAT,?> ⬅️ ::Mul(%"_val_7__71", %"alpha_0__76")
506 | # n3__76
%"add_4__71"<FLOAT,?> ⬅️ ::Add(%"mean_1__71", %"other_1__76")
507 | # n0__77
%"tmp__77"<FLOAT,?> ⬅️ ::Sqrt(%"add_4__71")
508 | # n1__77
%"rsqrt_1__71"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__77")
509 | # n0__78
%"mul_6__71"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_2__71", %"rsqrt_1__71")
510 | # Cast_13__71
%"_to_copy_3__71"<FLOAT16,?> ⬅️ ::Cast(%"mul_6__71") {to=10}
511 | # n0__79
%"model_layers_0_post_attention_layernorm_1__6"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.0.post_attention_layernorm.weight", %"_to_copy_3__71")
512 | # n0__83
%"tmp__83"<INT64,[2]> ⬅️ ::Shape(%"model.layers.0.mlp.gate_proj.weight")
513 | # n1__83
%"rank__82"<INT64,?> ⬅️ ::Size(%"tmp__83")
514 | # n1__82
%"int64_2__82"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
515 | # n2__82
%"int64_2_cast__82"<INT64,?> ⬅️ ::CastLike(%"int64_2__82", %"rank__82")
516 | # n3__82
%"cond__82"<BOOL,?> ⬅️ ::Equal(%"rank__82", %"int64_2_cast__82")
517 | # n4__82
%"t_4__81"<FLOAT16,[unk__117,unk__118]> ⬅️ ::If(%"cond__82") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__82"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__82_37
%"result__82"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.0.mlp.gate_proj.weight") {perm=[1, 0]}
return %"result__82"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__82"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__82_38
%"result_0__82"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.0.mlp.gate_proj.weight")
return %"result_0__82"<FLOAT16,[14336,4096]>
}}
518 | # Constant_3__81
%"_val_3__81"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
519 | # n0__84
%"size_0__84"<INT64,[2]> ⬅️ ::Cast(%"_val_3__81") {to=7}
520 | # n1__84
%"view_15__81"<FLOAT16,[unk__119,unk__120]> ⬅️ ::Reshape(%"model_layers_0_post_attention_layernorm_1__6", %"size_0__84")
521 | # n0__85
%"mm_4__81"<FLOAT16,[unk__119,unk__118]> ⬅️ ::MatMul(%"view_15__81", %"t_4__81")
522 | # Constant_6__81
%"_val_6__81"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
523 | # n0__86
%"size_0__86"<INT64,[3]> ⬅️ ::Cast(%"_val_6__81") {to=7}
524 | # n1__86
%"model_layers_0_mlp_gate_proj_1__80"<FLOAT16,[unk__121,unk__122,unk__123]> ⬅️ ::Reshape(%"mm_4__81", %"size_0__86")
525 | # Cast_0__87
%"_to_copy_4__87"<FLOAT,[unk__121,unk__122,unk__123]> ⬅️ ::Cast(%"model_layers_0_mlp_gate_proj_1__80") {to=1}
526 | # n0__88
%"sigmoid__87"<FLOAT,[unk__121,unk__122,unk__123]> ⬅️ ::Sigmoid(%"_to_copy_4__87")
527 | # n0__89
%"mul_8__87"<FLOAT,[unk__121,unk__122,unk__123]> ⬅️ ::Mul(%"_to_copy_4__87", %"sigmoid__87")
528 | # Cast_3__87
%"model_layers_0_mlp_act_fn_1__80"<FLOAT16,[unk__121,unk__122,unk__123]> ⬅️ ::Cast(%"mul_8__87") {to=10}
529 | # n0__92
%"tmp__92"<INT64,[2]> ⬅️ ::Shape(%"model.layers.0.mlp.up_proj.weight")
530 | # n1__92
%"rank__91"<INT64,?> ⬅️ ::Size(%"tmp__92")
531 | # n1__91
%"int64_2__91"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
532 | # n2__91
%"int64_2_cast__91"<INT64,?> ⬅️ ::CastLike(%"int64_2__91", %"rank__91")
533 | # n3__91
%"cond__91"<BOOL,?> ⬅️ ::Equal(%"rank__91", %"int64_2_cast__91")
534 | # n4__91
%"t_5__90"<FLOAT16,[unk__124,unk__125]> ⬅️ ::If(%"cond__91") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__91"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__91_39
%"result__91"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.0.mlp.up_proj.weight") {perm=[1, 0]}
return %"result__91"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__91"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__91_40
%"result_0__91"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.0.mlp.up_proj.weight")
return %"result_0__91"<FLOAT16,[14336,4096]>
}}
535 | # Constant_3__90
%"_val_3__90"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
536 | # n0__93
%"size_0__93"<INT64,[2]> ⬅️ ::Cast(%"_val_3__90") {to=7}
537 | # n1__93
%"view_17__90"<FLOAT16,[unk__126,unk__127]> ⬅️ ::Reshape(%"model_layers_0_post_attention_layernorm_1__6", %"size_0__93")
538 | # n0__94
%"mm_5__90"<FLOAT16,[unk__126,unk__125]> ⬅️ ::MatMul(%"view_17__90", %"t_5__90")
539 | # Constant_6__90
%"_val_6__90"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
540 | # n0__95
%"size_0__95"<INT64,[3]> ⬅️ ::Cast(%"_val_6__90") {to=7}
541 | # n1__95
%"model_layers_0_mlp_up_proj_1__80"<FLOAT16,[unk__128,unk__129,unk__130]> ⬅️ ::Reshape(%"mm_5__90", %"size_0__95")
542 | # n0__96
%"mul_9__80"<FLOAT16,[unk__131,unk__132,unk__133]> ⬅️ ::Mul(%"model_layers_0_mlp_act_fn_1__80", %"model_layers_0_mlp_up_proj_1__80")
543 | # n0__99
%"tmp__99"<INT64,[2]> ⬅️ ::Shape(%"model.layers.0.mlp.down_proj.weight")
544 | # n1__99
%"rank__98"<INT64,?> ⬅️ ::Size(%"tmp__99")
545 | # n1__98
%"int64_2__98"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
546 | # n2__98
%"int64_2_cast__98"<INT64,?> ⬅️ ::CastLike(%"int64_2__98", %"rank__98")
547 | # n3__98
%"cond__98"<BOOL,?> ⬅️ ::Equal(%"rank__98", %"int64_2_cast__98")
548 | # n4__98
%"t_6__97"<FLOAT16,[unk__134,unk__135]> ⬅️ ::If(%"cond__98") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__98"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__98_41
%"result__98"<FLOAT16,[14336,4096]> ⬅️ ::Transpose(%"model.layers.0.mlp.down_proj.weight") {perm=[1, 0]}
return %"result__98"<FLOAT16,[14336,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__98"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__98_42
%"result_0__98"<FLOAT16,[4096,14336]> ⬅️ ::Identity(%"model.layers.0.mlp.down_proj.weight")
return %"result_0__98"<FLOAT16,[4096,14336]>
}}
549 | # Constant_3__97
%"_val_3__97"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
550 | # n0__100
%"size_0__100"<INT64,[2]> ⬅️ ::Cast(%"_val_3__97") {to=7}
551 | # n1__100
%"view_19__97"<FLOAT16,[unk__136,unk__137]> ⬅️ ::Reshape(%"mul_9__80", %"size_0__100")
552 | # n0__101
%"mm_6__97"<FLOAT16,[unk__136,unk__135]> ⬅️ ::MatMul(%"view_19__97", %"t_6__97")
553 | # Constant_6__97
%"_val_6__97"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
554 | # n0__102
%"size_0__102"<INT64,[3]> ⬅️ ::Cast(%"_val_6__97") {to=7}
555 | # n1__102
%"model_layers_0_mlp_1__6"<FLOAT16,[unk__138,unk__139,unk__140]> ⬅️ ::Reshape(%"mm_6__97", %"size_0__102")
556 | # n0__103
%"alpha__103"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
557 | # n1__103
%"alpha_0__103"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__103", %"model_layers_0_mlp_1__6")
558 | # n2__103
%"other_1__103"<FLOAT16,[unk__138,unk__139,unk__140]> ⬅️ ::Mul(%"model_layers_0_mlp_1__6", %"alpha_0__103")
559 | # n3__103
%"model_layers_0_1_2__1"<FLOAT16,[unk__141,128,4096]> ⬅️ ::Add(%"add_3__6", %"other_1__103")
560 | # Cast_3__105
%"_to_copy_6__105"<FLOAT,[unk__141,128,4096]> ⬅️ ::Cast(%"model_layers_0_1_2__1") {to=1}
561 | # Constant_4__105
%"_val_2__105"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
562 | # Cast_5__105
%"scalar_tensor_default_2__105"<FLOAT,?> ⬅️ ::Cast(%"_val_2__105") {to=1}
563 | # n0__106
%"pow_3__105"<FLOAT,[unk__141,128,4096]> ⬅️ ::Pow(%"_to_copy_6__105", %"scalar_tensor_default_2__105")
564 | # Constant_7__105
%"_val_5__105"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
565 | # n0__108
%"tmp__108"<INT64,[3]> ⬅️ ::Shape(%"pow_3__105")
566 | # n1__108
%"tmp_0__108"<INT64,?> ⬅️ ::Size(%"tmp__108")
567 | # n2__108
%"tmp_1__108"<INT64,?> ⬅️ ::Constant() {value_int=0}
568 | # n3__108
%"cond__107"<BOOL,?> ⬅️ ::Equal(%"tmp_0__108", %"tmp_1__108")
569 | # n1__107
%"mean_2__105"<FLOAT,?> ⬅️ ::If(%"cond__107") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__107"<FLOAT,[unk__141,128,4096]>
),
) {
0 | # n0__107_43
%"result__107"<FLOAT,[unk__141,128,4096]> ⬅️ ::Identity(%"pow_3__105")
return %"result__107"<FLOAT,[unk__141,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__107"<FLOAT,?>
),
) {
0 | # n0__109
%"tmp__109"<INT64,[1]> ⬅️ ::Shape(%"_val_5__105")
1 | # n1__109
%"tmp_0__109"<INT64,?> ⬅️ ::Size(%"tmp__109")
2 | # n2__109
%"tmp_1__109"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__109
%"cond_0__107"<BOOL,?> ⬅️ ::Equal(%"tmp_0__109", %"tmp_1__109")
4 | # n1__107_45
%"dim_3__107"<INT64,?> ⬅️ ::If(%"cond_0__107") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__107"<INT64,[1,1]>
),
) {
0 | # n0__107_46
%"int64_0__107"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__107_47
%"dim_1__107"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__105", %"int64_0__107")
return %"dim_1__107"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__107"<INT64,[1]>
),
) {
0 | # n0__107_48
%"dim_2__107"<INT64,[1]> ⬅️ ::Identity(%"_val_5__105")
return %"dim_2__107"<INT64,[1]>
}}
5 | # n2__107
%"result_4__107"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_3__105", %"dim_3__107") {keepdims=1}
return %"result_4__107"<FLOAT,?>
}}
570 | # Constant_9__105
%"_val_7__105"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
571 | # n0__110
%"alpha__110"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
572 | # n1__110
%"alpha_0__110"<FLOAT,?> ⬅️ ::CastLike(%"alpha__110", %"_val_7__105")
573 | # n2__110
%"other_1__110"<FLOAT,?> ⬅️ ::Mul(%"_val_7__105", %"alpha_0__110")
574 | # n3__110
%"add_6__105"<FLOAT,?> ⬅️ ::Add(%"mean_2__105", %"other_1__110")
575 | # n0__111
%"tmp__111"<FLOAT,?> ⬅️ ::Sqrt(%"add_6__105")
576 | # n1__111
%"rsqrt_2__105"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__111")
577 | # n0__112
%"mul_10__105"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_6__105", %"rsqrt_2__105")
578 | # Cast_13__105
%"_to_copy_7__105"<FLOAT16,?> ⬅️ ::Cast(%"mul_10__105") {to=10}
579 | # n0__113
%"model_layers_1_input_layernorm_1__104"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.1.input_layernorm.weight", %"_to_copy_7__105")
580 | # n0__117
%"tmp__117"<INT64,[2]> ⬅️ ::Shape(%"model.layers.1.self_attn.q_proj.weight")
581 | # n1__117
%"rank__116"<INT64,?> ⬅️ ::Size(%"tmp__117")
582 | # n1__116
%"int64_2__116"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
583 | # n2__116
%"int64_2_cast__116"<INT64,?> ⬅️ ::CastLike(%"int64_2__116", %"rank__116")
584 | # n3__116
%"cond__116"<BOOL,?> ⬅️ ::Equal(%"rank__116", %"int64_2_cast__116")
585 | # n4__116
%"t_7__115"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__116") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__116"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__116_49
%"result__116"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.1.self_attn.q_proj.weight") {perm=[1, 0]}
return %"result__116"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__116"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__116_50
%"result_0__116"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.1.self_attn.q_proj.weight")
return %"result_0__116"<FLOAT16,[4096,4096]>
}}
586 | # Constant_3__115
%"_val_3__115"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
587 | # n0__118
%"size_0__118"<INT64,[2]> ⬅️ ::Cast(%"_val_3__115") {to=7}
588 | # n1__118
%"view_21__115"<FLOAT16,[unk__142,unk__143]> ⬅️ ::Reshape(%"model_layers_1_input_layernorm_1__104", %"size_0__118")
589 | # n0__119
%"mm_7__115"<FLOAT16,[unk__142,4096]> ⬅️ ::MatMul(%"view_21__115", %"t_7__115")
590 | # Constant_6__115
%"_val_6__115"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
591 | # n0__120
%"size_0__120"<INT64,[3]> ⬅️ ::Cast(%"_val_6__115") {to=7}
592 | # n1__120
%"model_layers_1_self_attn_q_proj_1__114"<FLOAT16,[unk__144,unk__145,unk__146]> ⬅️ ::Reshape(%"mm_7__115", %"size_0__120")
593 | # n0__123
%"tmp__123"<INT64,[2]> ⬅️ ::Shape(%"model.layers.1.self_attn.k_proj.weight")
594 | # n1__123
%"rank__122"<INT64,?> ⬅️ ::Size(%"tmp__123")
595 | # n1__122
%"int64_2__122"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
596 | # n2__122
%"int64_2_cast__122"<INT64,?> ⬅️ ::CastLike(%"int64_2__122", %"rank__122")
597 | # n3__122
%"cond__122"<BOOL,?> ⬅️ ::Equal(%"rank__122", %"int64_2_cast__122")
598 | # n4__122
%"t_8__121"<FLOAT16,[unk__147,unk__148]> ⬅️ ::If(%"cond__122") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__122"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__122_51
%"result__122"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.1.self_attn.k_proj.weight") {perm=[1, 0]}
return %"result__122"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__122"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__122_52
%"result_0__122"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.1.self_attn.k_proj.weight")
return %"result_0__122"<FLOAT16,[1024,4096]>
}}
599 | # Constant_3__121
%"_val_3__121"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
600 | # n0__124
%"size_0__124"<INT64,[2]> ⬅️ ::Cast(%"_val_3__121") {to=7}
601 | # n1__124
%"view_23__121"<FLOAT16,[unk__149,unk__150]> ⬅️ ::Reshape(%"model_layers_1_input_layernorm_1__104", %"size_0__124")
602 | # n0__125
%"mm_8__121"<FLOAT16,[unk__149,unk__148]> ⬅️ ::MatMul(%"view_23__121", %"t_8__121")
603 | # Constant_6__121
%"_val_6__121"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
604 | # n0__126
%"size_0__126"<INT64,[3]> ⬅️ ::Cast(%"_val_6__121") {to=7}
605 | # n1__126
%"model_layers_1_self_attn_k_proj_1__114"<FLOAT16,[unk__151,unk__152,unk__153]> ⬅️ ::Reshape(%"mm_8__121", %"size_0__126")
606 | # n0__129
%"tmp__129"<INT64,[2]> ⬅️ ::Shape(%"model.layers.1.self_attn.v_proj.weight")
607 | # n1__129
%"rank__128"<INT64,?> ⬅️ ::Size(%"tmp__129")
608 | # n1__128
%"int64_2__128"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
609 | # n2__128
%"int64_2_cast__128"<INT64,?> ⬅️ ::CastLike(%"int64_2__128", %"rank__128")
610 | # n3__128
%"cond__128"<BOOL,?> ⬅️ ::Equal(%"rank__128", %"int64_2_cast__128")
611 | # n4__128
%"t_9__127"<FLOAT16,[unk__154,unk__155]> ⬅️ ::If(%"cond__128") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__128"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__128_53
%"result__128"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.1.self_attn.v_proj.weight") {perm=[1, 0]}
return %"result__128"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__128"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__128_54
%"result_0__128"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.1.self_attn.v_proj.weight")
return %"result_0__128"<FLOAT16,[1024,4096]>
}}
612 | # Constant_3__127
%"_val_3__127"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
613 | # n0__130
%"size_0__130"<INT64,[2]> ⬅️ ::Cast(%"_val_3__127") {to=7}
614 | # n1__130
%"view_25__127"<FLOAT16,[unk__156,unk__157]> ⬅️ ::Reshape(%"model_layers_1_input_layernorm_1__104", %"size_0__130")
615 | # n0__131
%"mm_9__127"<FLOAT16,[unk__156,unk__155]> ⬅️ ::MatMul(%"view_25__127", %"t_9__127")
616 | # Constant_6__127
%"_val_6__127"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
617 | # n0__132
%"size_0__132"<INT64,[3]> ⬅️ ::Cast(%"_val_6__127") {to=7}
618 | # n1__132
%"model_layers_1_self_attn_v_proj_1__114"<FLOAT16,[unk__158,unk__159,unk__160]> ⬅️ ::Reshape(%"mm_9__127", %"size_0__132")
619 | # Constant_61__114
%"_val_8__114"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
620 | # n0__133
%"size_0__133"<INT64,[4]> ⬅️ ::Cast(%"_val_8__114") {to=7}
621 | # n1__133
%"view_27__114"<FLOAT16,[unk__161,unk__162,unk__163,unk__164]> ⬅️ ::Reshape(%"model_layers_1_self_attn_q_proj_1__114", %"size_0__133")
622 | # Transpose_63__114
%"transpose_4__114"<FLOAT16,[unk__161,unk__163,unk__162,unk__164]> ⬅️ ::Transpose(%"view_27__114") {perm=[0, 2, 1, 3]}
623 | # Constant_64__114
%"_val_11__114"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
624 | # n0__134
%"size_0__134"<INT64,[4]> ⬅️ ::Cast(%"_val_11__114") {to=7}
625 | # n1__134
%"view_28__114"<FLOAT16,[unk__165,unk__166,unk__167,unk__168]> ⬅️ ::Reshape(%"model_layers_1_self_attn_k_proj_1__114", %"size_0__134")
626 | # Transpose_66__114
%"transpose_5__114"<FLOAT16,[unk__165,unk__167,unk__166,unk__168]> ⬅️ ::Transpose(%"view_28__114") {perm=[0, 2, 1, 3]}
627 | # Constant_67__114
%"_val_14__114"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
628 | # n0__135
%"size_0__135"<INT64,[4]> ⬅️ ::Cast(%"_val_14__114") {to=7}
629 | # n1__135
%"view_29__114"<FLOAT16,[unk__169,unk__170,unk__171,unk__172]> ⬅️ ::Reshape(%"model_layers_1_self_attn_v_proj_1__114", %"size_0__135")
630 | # Transpose_69__114
%"model_1_2"<FLOAT16,[1,8,128,128]> ⬅️ ::Transpose(%"view_29__114") {perm=[0, 2, 1, 3]}
631 | # Constant_8__136
%"_val_1__136"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
632 | # Cast_9__136
%"_val_2__136"<INT64,?> ⬅️ ::Cast(%"_val_1__136") {to=7}
633 | # Constant_10__136
%"_val_3__136"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
634 | # Reshape_11__136
%"_val_4__136"<INT64,[1]> ⬅️ ::Reshape(%"_val_2__136", %"_val_3__136") {allowzero=0}
635 | # Constant_12__136
%"_val_5__136"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
636 | # Cast_13__136
%"_val_6__136"<INT64,?> ⬅️ ::Cast(%"_val_5__136") {to=7}
637 | # Constant_14__136
%"_val_7__136"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
638 | # Reshape_15__136
%"_val_8__136"<INT64,[1]> ⬅️ ::Reshape(%"_val_6__136", %"_val_7__136") {allowzero=0}
639 | # Constant_16__136
%"_val_9__136"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
640 | # Cast_17__136
%"_val_10__136"<INT64,?> ⬅️ ::Cast(%"_val_9__136") {to=7}
641 | # Constant_18__136
%"_val_11__136"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
642 | # Reshape_19__136
%"_val_12__136"<INT64,[1]> ⬅️ ::Reshape(%"_val_10__136", %"_val_11__136") {allowzero=0}
643 | # Constant_20__136
%"_val_13__136"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
644 | # Cast_21__136
%"_val_14__136"<INT64,?> ⬅️ ::Cast(%"_val_13__136") {to=7}
645 | # Constant_22__136
%"_val_15__136"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
646 | # Reshape_23__136
%"_val_16__136"<INT64,[1]> ⬅️ ::Reshape(%"_val_14__136", %"_val_15__136") {allowzero=0}
647 | # Slice_24__136
%"model_layers_1_self_attn_rotary_emb_1__114"<FLOAT16,[unk__173,unk__174]> ⬅️ ::Slice(%"model.layers.1.self_attn.rotary_emb.cos_cached", %"_val_4__136", %"_val_8__136", %"_val_12__136", %"_val_16__136")
648 | # Constant_25__136
%"_val_19__136"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
649 | # Cast_26__136
%"_val_20__136"<INT64,?> ⬅️ ::Cast(%"_val_19__136") {to=7}
650 | # Constant_27__136
%"_val_21__136"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
651 | # Reshape_28__136
%"_val_22__136"<INT64,[1]> ⬅️ ::Reshape(%"_val_20__136", %"_val_21__136") {allowzero=0}
652 | # Constant_29__136
%"_val_23__136"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
653 | # Cast_30__136
%"_val_24__136"<INT64,?> ⬅️ ::Cast(%"_val_23__136") {to=7}
654 | # Constant_31__136
%"_val_25__136"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
655 | # Reshape_32__136
%"_val_26__136"<INT64,[1]> ⬅️ ::Reshape(%"_val_24__136", %"_val_25__136") {allowzero=0}
656 | # Constant_33__136
%"_val_27__136"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
657 | # Cast_34__136
%"_val_28__136"<INT64,?> ⬅️ ::Cast(%"_val_27__136") {to=7}
658 | # Constant_35__136
%"_val_29__136"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
659 | # Reshape_36__136
%"_val_30__136"<INT64,[1]> ⬅️ ::Reshape(%"_val_28__136", %"_val_29__136") {allowzero=0}
660 | # Constant_37__136
%"_val_31__136"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
661 | # Cast_38__136
%"_val_32__136"<INT64,?> ⬅️ ::Cast(%"_val_31__136") {to=7}
662 | # Constant_39__136
%"_val_33__136"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
663 | # Reshape_40__136
%"_val_34__136"<INT64,[1]> ⬅️ ::Reshape(%"_val_32__136", %"_val_33__136") {allowzero=0}
664 | # Slice_41__136
%"model_layers_1_self_attn_rotary_emb_1_1__114"<FLOAT16,[unk__175,unk__176]> ⬅️ ::Slice(%"model.layers.1.self_attn.rotary_emb.sin_cached", %"_val_22__136", %"_val_26__136", %"_val_30__136", %"_val_34__136")
665 | # Transpose_71__114
%"_val_21__114"<FLOAT16,[unk__173,unk__174]> ⬅️ ::Transpose(%"model_layers_1_self_attn_rotary_emb_1__114") {perm=[0, 1]}
666 | # Max_72__114
%"_val_22__114"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
667 | # Shape_73__114
%"_val_23__114"<INT64,[2]> ⬅️ ::Shape(%"_val_22__114") {start=0}
668 | # Expand_74__114
%"_val_24__114"<INT64,[unk__177,unk__178]> ⬅️ ::Expand(%"view__1", %"_val_23__114")
669 | # Constant_75__114
%"_val_25__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
670 | # Unsqueeze_76__114
%"_val_26__114"<INT64,[unk__177,unk__178,1]> ⬅️ ::Unsqueeze(%"_val_24__114", %"_val_25__114")
671 | # Concat_77__114
%"_val_27__114"<INT64,[unk__177,unk__178,1]> ⬅️ ::Concat(%"_val_26__114") {axis=-1}
672 | # GatherND_78__114
%"_val_28__114"<FLOAT16,[unk__177,unk__178,unk__174]> ⬅️ ::GatherND(%"_val_21__114", %"_val_27__114") {batch_dims=0}
673 | # Transpose_79__114
%"index_2__114"<FLOAT16,[unk__177,unk__178,unk__174]> ⬅️ ::Transpose(%"_val_28__114") {perm=[0, 1, 2]}
674 | # n0__137
%"dim__137"<INT64,?> ⬅️ ::Constant() {value_int=1}
675 | # n1__137
%"dim_0__137"<INT64,?> ⬅️ ::Cast(%"dim__137") {to=7}
676 | # n2__137
%"unsqueeze_5__114"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_2__114", %"dim_0__137")
677 | # Transpose_81__114
%"_val_31__114"<FLOAT16,[unk__175,unk__176]> ⬅️ ::Transpose(%"model_layers_1_self_attn_rotary_emb_1_1__114") {perm=[0, 1]}
678 | # Max_82__114
%"_val_32__114"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
679 | # Shape_83__114
%"_val_33__114"<INT64,[2]> ⬅️ ::Shape(%"_val_32__114") {start=0}
680 | # Expand_84__114
%"_val_34__114"<INT64,[unk__179,unk__180]> ⬅️ ::Expand(%"view__1", %"_val_33__114")
681 | # Constant_85__114
%"_val_35__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
682 | # Unsqueeze_86__114
%"_val_36__114"<INT64,[unk__179,unk__180,1]> ⬅️ ::Unsqueeze(%"_val_34__114", %"_val_35__114")
683 | # Concat_87__114
%"_val_37__114"<INT64,[unk__179,unk__180,1]> ⬅️ ::Concat(%"_val_36__114") {axis=-1}
684 | # GatherND_88__114
%"_val_38__114"<FLOAT16,[unk__179,unk__180,unk__176]> ⬅️ ::GatherND(%"_val_31__114", %"_val_37__114") {batch_dims=0}
685 | # Transpose_89__114
%"index_3__114"<FLOAT16,[unk__179,unk__180,unk__176]> ⬅️ ::Transpose(%"_val_38__114") {perm=[0, 1, 2]}
686 | # n0__138
%"dim__138"<INT64,?> ⬅️ ::Constant() {value_int=1}
687 | # n1__138
%"dim_0__138"<INT64,?> ⬅️ ::Cast(%"dim__138") {to=7}
688 | # n2__138
%"unsqueeze_6__114"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_3__114", %"dim_0__138")
689 | # n0__139
%"mul_12__114"<FLOAT16,?> ⬅️ ::Mul(%"transpose_4__114", %"unsqueeze_5__114")
690 | # Constant_92__114
%"_val_42__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
691 | # Cast_93__114
%"_val_43__114"<INT64,?> ⬅️ ::Cast(%"_val_42__114") {to=7}
692 | # Constant_94__114
%"_val_44__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
693 | # Reshape_95__114
%"_val_45__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_43__114", %"_val_44__114") {allowzero=0}
694 | # Constant_96__114
%"_val_46__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
695 | # Cast_97__114
%"_val_47__114"<INT64,?> ⬅️ ::Cast(%"_val_46__114") {to=7}
696 | # Constant_98__114
%"_val_48__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
697 | # Reshape_99__114
%"_val_49__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_47__114", %"_val_48__114") {allowzero=0}
698 | # Constant_100__114
%"_val_50__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
699 | # Cast_101__114
%"_val_51__114"<INT64,?> ⬅️ ::Cast(%"_val_50__114") {to=7}
700 | # Constant_102__114
%"_val_52__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
701 | # Reshape_103__114
%"_val_53__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_51__114", %"_val_52__114") {allowzero=0}
702 | # Constant_104__114
%"_val_54__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
703 | # Cast_105__114
%"_val_55__114"<INT64,?> ⬅️ ::Cast(%"_val_54__114") {to=7}
704 | # Constant_106__114
%"_val_56__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
705 | # Reshape_107__114
%"_val_57__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_55__114", %"_val_56__114") {allowzero=0}
706 | # Slice_108__114
%"slice_17__114"<FLOAT16,[unk__181,unk__182,unk__183,unk__184]> ⬅️ ::Slice(%"transpose_4__114", %"_val_45__114", %"_val_49__114", %"_val_53__114", %"_val_57__114")
707 | # Constant_109__114
%"_val_59__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
708 | # Cast_110__114
%"_val_60__114"<INT64,?> ⬅️ ::Cast(%"_val_59__114") {to=7}
709 | # Constant_111__114
%"_val_61__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
710 | # Reshape_112__114
%"_val_62__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_60__114", %"_val_61__114") {allowzero=0}
711 | # Constant_113__114
%"_val_63__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
712 | # Cast_114__114
%"_val_64__114"<INT64,?> ⬅️ ::Cast(%"_val_63__114") {to=7}
713 | # Constant_115__114
%"_val_65__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
714 | # Reshape_116__114
%"_val_66__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_64__114", %"_val_65__114") {allowzero=0}
715 | # Constant_117__114
%"_val_67__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
716 | # Cast_118__114
%"_val_68__114"<INT64,?> ⬅️ ::Cast(%"_val_67__114") {to=7}
717 | # Constant_119__114
%"_val_69__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
718 | # Reshape_120__114
%"_val_70__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_68__114", %"_val_69__114") {allowzero=0}
719 | # Constant_121__114
%"_val_71__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
720 | # Cast_122__114
%"_val_72__114"<INT64,?> ⬅️ ::Cast(%"_val_71__114") {to=7}
721 | # Constant_123__114
%"_val_73__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
722 | # Reshape_124__114
%"_val_74__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_72__114", %"_val_73__114") {allowzero=0}
723 | # Slice_125__114
%"slice_18__114"<FLOAT16,[unk__185,unk__186,unk__187,unk__188]> ⬅️ ::Slice(%"transpose_4__114", %"_val_62__114", %"_val_66__114", %"_val_70__114", %"_val_74__114")
724 | # n0__140
%"neg_2__114"<FLOAT16,[unk__185,unk__186,unk__187,unk__188]> ⬅️ ::Neg(%"slice_18__114")
725 | # SequenceConstruct_127__114
%"77__114"<Sequence(Tensor(FLOAT16)),[unk__189,unk__190,unk__191,unk__192]> ⬅️ ::SequenceConstruct(%"neg_2__114", %"slice_17__114")
726 | # n0__141
%"cat_2__114"<FLOAT16,[unk__189,unk__190,unk__191,unk__193]> ⬅️ ::ConcatFromSequence(%"77__114") {axis=-1}
727 | # n0__142
%"mul_13__114"<FLOAT16,?> ⬅️ ::Mul(%"cat_2__114", %"unsqueeze_6__114")
728 | # n0__143
%"alpha__143"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
729 | # n1__143
%"alpha_0__143"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__143", %"mul_13__114")
730 | # n2__143
%"other_1__143"<FLOAT16,?> ⬅️ ::Mul(%"mul_13__114", %"alpha_0__143")
731 | # n3__143
%"add_7__114"<FLOAT16,?> ⬅️ ::Add(%"mul_12__114", %"other_1__143")
732 | # n0__144
%"mul_14__114"<FLOAT16,?> ⬅️ ::Mul(%"transpose_5__114", %"unsqueeze_5__114")
733 | # Constant_132__114
%"_val_82__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
734 | # Cast_133__114
%"_val_83__114"<INT64,?> ⬅️ ::Cast(%"_val_82__114") {to=7}
735 | # Constant_134__114
%"_val_84__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
736 | # Reshape_135__114
%"_val_85__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_83__114", %"_val_84__114") {allowzero=0}
737 | # Constant_136__114
%"_val_86__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
738 | # Cast_137__114
%"_val_87__114"<INT64,?> ⬅️ ::Cast(%"_val_86__114") {to=7}
739 | # Constant_138__114
%"_val_88__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
740 | # Reshape_139__114
%"_val_89__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_87__114", %"_val_88__114") {allowzero=0}
741 | # Constant_140__114
%"_val_90__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
742 | # Cast_141__114
%"_val_91__114"<INT64,?> ⬅️ ::Cast(%"_val_90__114") {to=7}
743 | # Constant_142__114
%"_val_92__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
744 | # Reshape_143__114
%"_val_93__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_91__114", %"_val_92__114") {allowzero=0}
745 | # Constant_144__114
%"_val_94__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
746 | # Cast_145__114
%"_val_95__114"<INT64,?> ⬅️ ::Cast(%"_val_94__114") {to=7}
747 | # Constant_146__114
%"_val_96__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
748 | # Reshape_147__114
%"_val_97__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_95__114", %"_val_96__114") {allowzero=0}
749 | # Slice_148__114
%"slice_19__114"<FLOAT16,[unk__194,unk__195,unk__196,unk__197]> ⬅️ ::Slice(%"transpose_5__114", %"_val_85__114", %"_val_89__114", %"_val_93__114", %"_val_97__114")
750 | # Constant_149__114
%"_val_99__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
751 | # Cast_150__114
%"_val_100__114"<INT64,?> ⬅️ ::Cast(%"_val_99__114") {to=7}
752 | # Constant_151__114
%"_val_101__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
753 | # Reshape_152__114
%"_val_102__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_100__114", %"_val_101__114") {allowzero=0}
754 | # Constant_153__114
%"_val_103__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
755 | # Cast_154__114
%"_val_104__114"<INT64,?> ⬅️ ::Cast(%"_val_103__114") {to=7}
756 | # Constant_155__114
%"_val_105__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
757 | # Reshape_156__114
%"_val_106__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_104__114", %"_val_105__114") {allowzero=0}
758 | # Constant_157__114
%"_val_107__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
759 | # Cast_158__114
%"_val_108__114"<INT64,?> ⬅️ ::Cast(%"_val_107__114") {to=7}
760 | # Constant_159__114
%"_val_109__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
761 | # Reshape_160__114
%"_val_110__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_108__114", %"_val_109__114") {allowzero=0}
762 | # Constant_161__114
%"_val_111__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
763 | # Cast_162__114
%"_val_112__114"<INT64,?> ⬅️ ::Cast(%"_val_111__114") {to=7}
764 | # Constant_163__114
%"_val_113__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
765 | # Reshape_164__114
%"_val_114__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_112__114", %"_val_113__114") {allowzero=0}
766 | # Slice_165__114
%"slice_20__114"<FLOAT16,[unk__198,unk__199,unk__200,unk__201]> ⬅️ ::Slice(%"transpose_5__114", %"_val_102__114", %"_val_106__114", %"_val_110__114", %"_val_114__114")
767 | # n0__145
%"neg_3__114"<FLOAT16,[unk__198,unk__199,unk__200,unk__201]> ⬅️ ::Neg(%"slice_20__114")
768 | # SequenceConstruct_167__114
%"117__114"<Sequence(Tensor(FLOAT16)),[unk__202,unk__203,unk__204,unk__205]> ⬅️ ::SequenceConstruct(%"neg_3__114", %"slice_19__114")
769 | # n0__146
%"cat_3__114"<FLOAT16,[unk__202,unk__203,unk__204,unk__206]> ⬅️ ::ConcatFromSequence(%"117__114") {axis=-1}
770 | # n0__147
%"mul_15__114"<FLOAT16,?> ⬅️ ::Mul(%"cat_3__114", %"unsqueeze_6__114")
771 | # n0__148
%"alpha__148"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
772 | # n1__148
%"alpha_0__148"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__148", %"mul_15__114")
773 | # n2__148
%"other_1__148"<FLOAT16,?> ⬅️ ::Mul(%"mul_15__114", %"alpha_0__148")
774 | # n3__148
%"model_1_3"<FLOAT16,[1,8,128,128]> ⬅️ ::Add(%"mul_14__114", %"other_1__148")
775 | # Constant_171__114
%"_val_121__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
776 | # Cast_172__114
%"_val_122__114"<INT64,?> ⬅️ ::Cast(%"_val_121__114") {to=7}
777 | # Constant_173__114
%"_val_123__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
778 | # Reshape_174__114
%"_val_124__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_122__114", %"_val_123__114") {allowzero=0}
779 | # Constant_175__114
%"_val_125__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
780 | # Cast_176__114
%"_val_126__114"<INT64,?> ⬅️ ::Cast(%"_val_125__114") {to=7}
781 | # Constant_177__114
%"_val_127__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
782 | # Reshape_178__114
%"_val_128__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_126__114", %"_val_127__114") {allowzero=0}
783 | # Constant_179__114
%"_val_129__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
784 | # Cast_180__114
%"_val_130__114"<INT64,?> ⬅️ ::Cast(%"_val_129__114") {to=7}
785 | # Constant_181__114
%"_val_131__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
786 | # Reshape_182__114
%"_val_132__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_130__114", %"_val_131__114") {allowzero=0}
787 | # Constant_183__114
%"_val_133__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
788 | # Cast_184__114
%"_val_134__114"<INT64,?> ⬅️ ::Cast(%"_val_133__114") {to=7}
789 | # Constant_185__114
%"_val_135__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
790 | # Reshape_186__114
%"_val_136__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_134__114", %"_val_135__114") {allowzero=0}
791 | # Slice_187__114
%"slice_21__114"<FLOAT16,[unk__207,unk__208,unk__209,unk__210]> ⬅️ ::Slice(%"model_1_3", %"_val_124__114", %"_val_128__114", %"_val_132__114", %"_val_136__114")
792 | # Constant_188__114
%"_val_138__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
793 | # Cast_189__114
%"_val_139__114"<INT64,?> ⬅️ ::Cast(%"_val_138__114") {to=7}
794 | # Constant_190__114
%"_val_140__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
795 | # Reshape_191__114
%"_val_141__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_139__114", %"_val_140__114") {allowzero=0}
796 | # Constant_192__114
%"_val_142__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
797 | # Cast_193__114
%"_val_143__114"<INT64,?> ⬅️ ::Cast(%"_val_142__114") {to=7}
798 | # Constant_194__114
%"_val_144__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
799 | # Reshape_195__114
%"_val_145__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_143__114", %"_val_144__114") {allowzero=0}
800 | # Constant_196__114
%"_val_146__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
801 | # Cast_197__114
%"_val_147__114"<INT64,?> ⬅️ ::Cast(%"_val_146__114") {to=7}
802 | # Constant_198__114
%"_val_148__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
803 | # Reshape_199__114
%"_val_149__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_147__114", %"_val_148__114") {allowzero=0}
804 | # Constant_200__114
%"_val_150__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
805 | # Cast_201__114
%"_val_151__114"<INT64,?> ⬅️ ::Cast(%"_val_150__114") {to=7}
806 | # Constant_202__114
%"_val_152__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
807 | # Reshape_203__114
%"_val_153__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_151__114", %"_val_152__114") {allowzero=0}
808 | # Slice_204__114
%"slice_22__114"<FLOAT16,[unk__211,unk__212,unk__213,unk__214]> ⬅️ ::Slice(%"slice_21__114", %"_val_141__114", %"_val_145__114", %"_val_149__114", %"_val_153__114")
809 | # n0__149
%"dim__149"<INT64,?> ⬅️ ::Constant() {value_int=2}
810 | # n1__149
%"dim_0__149"<INT64,?> ⬅️ ::Cast(%"dim__149") {to=7}
811 | # n2__149
%"unsqueeze_7__114"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_22__114", %"dim_0__149")
812 | # Constant_206__114
%"_val_156__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
813 | # Cast_207__114
%"_val_157__114"<INT64,?> ⬅️ ::Cast(%"_val_156__114") {to=7}
814 | # Constant_208__114
%"_val_158__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
815 | # Reshape_209__114
%"_val_159__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_157__114", %"_val_158__114") {allowzero=0}
816 | # Constant_210__114
%"_val_160__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
817 | # Cast_211__114
%"_val_161__114"<INT64,?> ⬅️ ::Cast(%"_val_160__114") {to=7}
818 | # Constant_212__114
%"_val_162__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
819 | # Reshape_213__114
%"_val_163__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_161__114", %"_val_162__114") {allowzero=0}
820 | # Constant_214__114
%"_val_164__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
821 | # Cast_215__114
%"_val_165__114"<INT64,?> ⬅️ ::Cast(%"_val_164__114") {to=7}
822 | # Constant_216__114
%"_val_166__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
823 | # Reshape_217__114
%"_val_167__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_165__114", %"_val_166__114") {allowzero=0}
824 | # Constant_218__114
%"_val_168__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
825 | # Cast_219__114
%"_val_169__114"<INT64,?> ⬅️ ::Cast(%"_val_168__114") {to=7}
826 | # Constant_220__114
%"_val_170__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
827 | # Reshape_221__114
%"_val_171__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_169__114", %"_val_170__114") {allowzero=0}
828 | # Slice_222__114
%"slice_23__114"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_7__114", %"_val_159__114", %"_val_163__114", %"_val_167__114", %"_val_171__114")
829 | # Constant_223__114
%"_val_173__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
830 | # Cast_224__114
%"_val_174__114"<INT64,?> ⬅️ ::Cast(%"_val_173__114") {to=7}
831 | # Constant_225__114
%"_val_175__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
832 | # Reshape_226__114
%"_val_176__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_174__114", %"_val_175__114") {allowzero=0}
833 | # Constant_227__114
%"_val_177__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
834 | # Cast_228__114
%"_val_178__114"<INT64,?> ⬅️ ::Cast(%"_val_177__114") {to=7}
835 | # Constant_229__114
%"_val_179__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
836 | # Reshape_230__114
%"_val_180__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_178__114", %"_val_179__114") {allowzero=0}
837 | # Constant_231__114
%"_val_181__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
838 | # Cast_232__114
%"_val_182__114"<INT64,?> ⬅️ ::Cast(%"_val_181__114") {to=7}
839 | # Constant_233__114
%"_val_183__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
840 | # Reshape_234__114
%"_val_184__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_182__114", %"_val_183__114") {allowzero=0}
841 | # Constant_235__114
%"_val_185__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
842 | # Cast_236__114
%"_val_186__114"<INT64,?> ⬅️ ::Cast(%"_val_185__114") {to=7}
843 | # Constant_237__114
%"_val_187__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
844 | # Reshape_238__114
%"_val_188__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_186__114", %"_val_187__114") {allowzero=0}
845 | # Slice_239__114
%"slice_24__114"<FLOAT16,?> ⬅️ ::Slice(%"slice_23__114", %"_val_176__114", %"_val_180__114", %"_val_184__114", %"_val_188__114")
846 | # Constant_240__114
%"_val_190__114"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
847 | # n0__150
%"size_0__150"<INT64,[5]> ⬅️ ::Cast(%"_val_190__114") {to=7}
848 | # n1__150
%"size_1__150"<INT64,[5]> ⬅️ ::Abs(%"size_0__150")
849 | # n2__150
%"expand_2__114"<FLOAT16,?> ⬅️ ::Expand(%"slice_24__114", %"size_1__150")
850 | # n0__151
%"clone_2__114"<FLOAT16,?> ⬅️ ::Identity(%"expand_2__114")
851 | # Constant_243__114
%"_val_193__114"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
852 | # n0__152
%"size_0__152"<INT64,[4]> ⬅️ ::Cast(%"_val_193__114") {to=7}
853 | # n1__152
%"view_30__114"<FLOAT16,[unk__215,unk__216,unk__217,unk__218]> ⬅️ ::Reshape(%"clone_2__114", %"size_0__152")
854 | # Constant_245__114
%"_val_195__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
855 | # Cast_246__114
%"_val_196__114"<INT64,?> ⬅️ ::Cast(%"_val_195__114") {to=7}
856 | # Constant_247__114
%"_val_197__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
857 | # Reshape_248__114
%"_val_198__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_196__114", %"_val_197__114") {allowzero=0}
858 | # Constant_249__114
%"_val_199__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
859 | # Cast_250__114
%"_val_200__114"<INT64,?> ⬅️ ::Cast(%"_val_199__114") {to=7}
860 | # Constant_251__114
%"_val_201__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
861 | # Reshape_252__114
%"_val_202__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_200__114", %"_val_201__114") {allowzero=0}
862 | # Constant_253__114
%"_val_203__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
863 | # Cast_254__114
%"_val_204__114"<INT64,?> ⬅️ ::Cast(%"_val_203__114") {to=7}
864 | # Constant_255__114
%"_val_205__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
865 | # Reshape_256__114
%"_val_206__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_204__114", %"_val_205__114") {allowzero=0}
866 | # Constant_257__114
%"_val_207__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
867 | # Cast_258__114
%"_val_208__114"<INT64,?> ⬅️ ::Cast(%"_val_207__114") {to=7}
868 | # Constant_259__114
%"_val_209__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
869 | # Reshape_260__114
%"_val_210__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_208__114", %"_val_209__114") {allowzero=0}
870 | # Slice_261__114
%"slice_25__114"<FLOAT16,[unk__219,unk__220,unk__221,unk__222]> ⬅️ ::Slice(%"model_1_2", %"_val_198__114", %"_val_202__114", %"_val_206__114", %"_val_210__114")
871 | # Constant_262__114
%"_val_212__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
872 | # Cast_263__114
%"_val_213__114"<INT64,?> ⬅️ ::Cast(%"_val_212__114") {to=7}
873 | # Constant_264__114
%"_val_214__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
874 | # Reshape_265__114
%"_val_215__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_213__114", %"_val_214__114") {allowzero=0}
875 | # Constant_266__114
%"_val_216__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
876 | # Cast_267__114
%"_val_217__114"<INT64,?> ⬅️ ::Cast(%"_val_216__114") {to=7}
877 | # Constant_268__114
%"_val_218__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
878 | # Reshape_269__114
%"_val_219__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_217__114", %"_val_218__114") {allowzero=0}
879 | # Constant_270__114
%"_val_220__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
880 | # Cast_271__114
%"_val_221__114"<INT64,?> ⬅️ ::Cast(%"_val_220__114") {to=7}
881 | # Constant_272__114
%"_val_222__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
882 | # Reshape_273__114
%"_val_223__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_221__114", %"_val_222__114") {allowzero=0}
883 | # Constant_274__114
%"_val_224__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
884 | # Cast_275__114
%"_val_225__114"<INT64,?> ⬅️ ::Cast(%"_val_224__114") {to=7}
885 | # Constant_276__114
%"_val_226__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
886 | # Reshape_277__114
%"_val_227__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_225__114", %"_val_226__114") {allowzero=0}
887 | # Slice_278__114
%"slice_26__114"<FLOAT16,[unk__223,unk__224,unk__225,unk__226]> ⬅️ ::Slice(%"slice_25__114", %"_val_215__114", %"_val_219__114", %"_val_223__114", %"_val_227__114")
888 | # n0__153
%"dim__153"<INT64,?> ⬅️ ::Constant() {value_int=2}
889 | # n1__153
%"dim_0__153"<INT64,?> ⬅️ ::Cast(%"dim__153") {to=7}
890 | # n2__153
%"unsqueeze_8__114"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_26__114", %"dim_0__153")
891 | # Constant_280__114
%"_val_230__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
892 | # Cast_281__114
%"_val_231__114"<INT64,?> ⬅️ ::Cast(%"_val_230__114") {to=7}
893 | # Constant_282__114
%"_val_232__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
894 | # Reshape_283__114
%"_val_233__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_231__114", %"_val_232__114") {allowzero=0}
895 | # Constant_284__114
%"_val_234__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
896 | # Cast_285__114
%"_val_235__114"<INT64,?> ⬅️ ::Cast(%"_val_234__114") {to=7}
897 | # Constant_286__114
%"_val_236__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
898 | # Reshape_287__114
%"_val_237__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_235__114", %"_val_236__114") {allowzero=0}
899 | # Constant_288__114
%"_val_238__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
900 | # Cast_289__114
%"_val_239__114"<INT64,?> ⬅️ ::Cast(%"_val_238__114") {to=7}
901 | # Constant_290__114
%"_val_240__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
902 | # Reshape_291__114
%"_val_241__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_239__114", %"_val_240__114") {allowzero=0}
903 | # Constant_292__114
%"_val_242__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
904 | # Cast_293__114
%"_val_243__114"<INT64,?> ⬅️ ::Cast(%"_val_242__114") {to=7}
905 | # Constant_294__114
%"_val_244__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
906 | # Reshape_295__114
%"_val_245__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_243__114", %"_val_244__114") {allowzero=0}
907 | # Slice_296__114
%"slice_27__114"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_8__114", %"_val_233__114", %"_val_237__114", %"_val_241__114", %"_val_245__114")
908 | # Constant_297__114
%"_val_247__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
909 | # Cast_298__114
%"_val_248__114"<INT64,?> ⬅️ ::Cast(%"_val_247__114") {to=7}
910 | # Constant_299__114
%"_val_249__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
911 | # Reshape_300__114
%"_val_250__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_248__114", %"_val_249__114") {allowzero=0}
912 | # Constant_301__114
%"_val_251__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
913 | # Cast_302__114
%"_val_252__114"<INT64,?> ⬅️ ::Cast(%"_val_251__114") {to=7}
914 | # Constant_303__114
%"_val_253__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
915 | # Reshape_304__114
%"_val_254__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_252__114", %"_val_253__114") {allowzero=0}
916 | # Constant_305__114
%"_val_255__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
917 | # Cast_306__114
%"_val_256__114"<INT64,?> ⬅️ ::Cast(%"_val_255__114") {to=7}
918 | # Constant_307__114
%"_val_257__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
919 | # Reshape_308__114
%"_val_258__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_256__114", %"_val_257__114") {allowzero=0}
920 | # Constant_309__114
%"_val_259__114"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
921 | # Cast_310__114
%"_val_260__114"<INT64,?> ⬅️ ::Cast(%"_val_259__114") {to=7}
922 | # Constant_311__114
%"_val_261__114"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
923 | # Reshape_312__114
%"_val_262__114"<INT64,[1]> ⬅️ ::Reshape(%"_val_260__114", %"_val_261__114") {allowzero=0}
924 | # Slice_313__114
%"slice_28__114"<FLOAT16,?> ⬅️ ::Slice(%"slice_27__114", %"_val_250__114", %"_val_254__114", %"_val_258__114", %"_val_262__114")
925 | # Constant_314__114
%"_val_264__114"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
926 | # n0__154
%"size_0__154"<INT64,[5]> ⬅️ ::Cast(%"_val_264__114") {to=7}
927 | # n1__154
%"size_1__154"<INT64,[5]> ⬅️ ::Abs(%"size_0__154")
928 | # n2__154
%"expand_3__114"<FLOAT16,?> ⬅️ ::Expand(%"slice_28__114", %"size_1__154")
929 | # n0__155
%"clone_3__114"<FLOAT16,?> ⬅️ ::Identity(%"expand_3__114")
930 | # Constant_317__114
%"_val_267__114"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
931 | # n0__156
%"size_0__156"<INT64,[4]> ⬅️ ::Cast(%"_val_267__114") {to=7}
932 | # n1__156
%"view_31__114"<FLOAT16,[unk__227,unk__228,unk__229,unk__230]> ⬅️ ::Reshape(%"clone_3__114", %"size_0__156")
933 | # n0__157
%"tmp__157"<INT64,[unk__231]> ⬅️ ::Shape(%"add_7__114")
934 | # n1__157
%"int64_m1__157"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_m1')}
935 | # n2__157
%"tmp_subscripted__157"<INT64,?> ⬅️ ::Gather(%"tmp__157", %"int64_m1__157") {axis=0}
936 | # n3__157
%"embedding_size__157"<FLOAT16,?> ⬅️ ::CastLike(%"tmp_subscripted__157", %"add_7__114")
937 | # n4__157
%"const__157"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
938 | # n5__157
%"tmp_0__157"<FLOAT16,?> ⬅️ ::Sqrt(%"embedding_size__157")
939 | # n6__157
%"const_cast__157"<FLOAT16,?> ⬅️ ::CastLike(%"const__157", %"tmp_0__157")
940 | # n7__157
%"_val_269__114"<FLOAT16,?> ⬅️ ::Div(%"const_cast__157", %"tmp_0__157")
941 | # CastLike_320__114
%"_val_270__114"<FLOAT16,?> ⬅️ ::CastLike(%"_val_269__114", %"add_7__114")
942 | # n0__158
%"tmp__158"<INT64,[unk__232]> ⬅️ ::Shape(%"add_7__114")
943 | # n1__158
%"int64_0_1d__158"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
944 | # n2__158
%"int64_1_1d__158"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
945 | # n3__158
%"int64_m2_1d__158"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
946 | # n4__158
%"int64_m1_1d__158"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
947 | # n5__158
%"target_length__158"<INT64,[unk__233]> ⬅️ ::Slice(%"tmp__158", %"int64_m2_1d__158", %"int64_m1_1d__158", %"int64_0_1d__158", %"int64_1_1d__158")
948 | # n6__158
%"tmp_0__158"<INT64,[4]> ⬅️ ::Shape(%"view_30__114")
949 | # n7__158
%"int64_0_1d_1__158"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
950 | # n8__158
%"int64_1_1d_2__158"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
951 | # n9__158
%"int64_m2_1d_3__158"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_3')}
952 | # n10__158
%"int64_m1_1d_4__158"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_4')}
953 | # n11__158
%"source_length__158"<INT64,[1]> ⬅️ ::Slice(%"tmp_0__158", %"int64_m2_1d_3__158", %"int64_m1_1d_4__158", %"int64_0_1d_1__158", %"int64_1_1d_2__158")
954 | # n12__158
%"size__158"<INT64,[unk__234]> ⬅️ ::Concat(%"target_length__158", %"source_length__158") {axis=0}
955 | # n13__158
%"const__158"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
956 | # n14__158
%"attn_mask__158"<FLOAT,?> ⬅️ ::Expand(%"const__158", %"size__158")
957 | # n15__158
%"attn_mask_5__158"<FLOAT,?> ⬅️ ::Trilu(%"attn_mask__158") {upper=0}
958 | # n16__158
%"const_6__158"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_6')}
959 | # n17__158
%"const_6_cast__158"<FLOAT,?> ⬅️ ::CastLike(%"const_6__158", %"attn_mask_5__158")
960 | # n18__158
%"tmp_7__158"<BOOL,?> ⬅️ ::Equal(%"attn_mask_5__158", %"const_6_cast__158")
961 | # n19__158
%"tmp_8__158"<FLOAT,?> ⬅️ ::Constant() {value_float=-inf}
962 | # n20__158
%"const_9__158"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
963 | # n21__158
%"const_9_cast__158"<FLOAT,?> ⬅️ ::CastLike(%"const_9__158", %"tmp_8__158")
964 | # n22__158
%"attn_mask_10__158"<FLOAT,?> ⬅️ ::Where(%"tmp_7__158", %"tmp_8__158", %"const_9_cast__158")
965 | # n23__158
%"_val_271__114"<FLOAT16,?> ⬅️ ::CastLike(%"attn_mask_10__158", %"add_7__114")
966 | # n0__159
%"key_shape__159"<INT64,[4]> ⬅️ ::Shape(%"view_30__114")
967 | # n1__159
%"int64_0_1d__159"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
968 | # n2__159
%"int64_1_1d__159"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
969 | # n3__159
%"int64_m1_1d__159"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
970 | # n4__159
%"int64_9223372036854775807_1d__159"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_9223372036854775807_1d')}
971 | # n5__159
%"key_last_dim__159"<INT64,[1]> ⬅️ ::Slice(%"key_shape__159", %"int64_m1_1d__159", %"int64_9223372036854775807_1d__159", %"int64_0_1d__159", %"int64_1_1d__159")
972 | # n6__159
%"int64_0_1d_0__159"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_0')}
973 | # n7__159
%"int64_1_1d_1__159"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_1')}
974 | # n8__159
%"int64_m2_1d__159"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
975 | # n9__159
%"int64_m1_1d_2__159"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_2')}
976 | # n10__159
%"key_second_last_dim__159"<INT64,[1]> ⬅️ ::Slice(%"key_shape__159", %"int64_m2_1d__159", %"int64_m1_1d_2__159", %"int64_0_1d_0__159", %"int64_1_1d_1__159")
977 | # n11__159
%"int64_0_1d_3__159"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
978 | # n12__159
%"int64_1_1d_4__159"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
979 | # n13__159
%"int64_m2_1d_5__159"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_5')}
980 | # n14__159
%"key_first_dims__159"<INT64,[2]> ⬅️ ::Slice(%"key_shape__159", %"int64_0_1d_3__159", %"int64_m2_1d_5__159", %"int64_0_1d_3__159", %"int64_1_1d_4__159")
981 | # n15__159
%"tmp__159"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
982 | # n16__159
%"key_squeezed_shape__159"<INT64,[3]> ⬅️ ::Concat(%"tmp__159", %"key_second_last_dim__159", %"key_last_dim__159") {axis=0}
983 | # n17__159
%"key_squeezed__159"<FLOAT16,[unk__235,unk__236,unk__237]> ⬅️ ::Reshape(%"view_30__114", %"key_squeezed_shape__159")
984 | # n18__159
%"key_squeezed_transposed__159"<FLOAT16,[unk__235,unk__237,unk__236]> ⬅️ ::Transpose(%"key_squeezed__159") {perm=[0, 2, 1]}
985 | # n19__159
%"key_transposed_shape__159"<INT64,[4]> ⬅️ ::Concat(%"key_first_dims__159", %"key_last_dim__159", %"key_second_last_dim__159") {axis=0}
986 | # n20__159
%"key_transposed__159"<FLOAT16,[unk__238,unk__239,unk__240,unk__241]> ⬅️ ::Reshape(%"key_squeezed_transposed__159", %"key_transposed_shape__159")
987 | # n21__159
%"tmp_6__159"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__114")
988 | # n22__159
%"query_scaled__159"<FLOAT16,?> ⬅️ ::Mul(%"add_7__114", %"tmp_6__159")
989 | # n23__159
%"tmp_7__159"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__114")
990 | # n24__159
%"key_transposed_scaled__159"<FLOAT16,[unk__238,unk__239,unk__240,unk__241]> ⬅️ ::Mul(%"key_transposed__159", %"tmp_7__159")
991 | # n25__159
%"tmp_8__159"<FLOAT16,?> ⬅️ ::MatMul(%"query_scaled__159", %"key_transposed_scaled__159")
992 | # n26__159
%"tmp_9__159"<FLOAT16,?> ⬅️ ::Add(%"tmp_8__159", %"_val_271__114")
993 | # n27__159
%"attn_weight__159"<FLOAT16,?> ⬅️ ::Softmax(%"tmp_9__159") {axis=-1}
994 | # n28__159
%"dropout_p__159"<FLOAT,?> ⬅️ ::Constant() {value_float=0.0}
995 | # n29__159
%"attn_weight_10__159"<FLOAT16,?>, %"___159"<BOOL,?> ⬅️ ::Dropout(%"attn_weight__159", %"dropout_p__159")
996 | # n30__159
%"_scaled_dot_product_efficient_attention_1__114"<FLOAT16,?> ⬅️ ::MatMul(%"attn_weight_10__159", %"view_31__114")
997 | # n0__160
%"query_0__160"<FLOAT16,?> ⬅️ ::Transpose(%"add_7__114") {perm=[0, 2, 1, 3]}
998 | # n1__160
%"query_shape__160"<INT64,[unk__242]> ⬅️ ::Shape(%"query_0__160")
999 | # n2__160
%"int64_0_1d__160"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
1000 | # n3__160
%"int64_1_1d__160"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
1001 | # n4__160
%"query_first_dims__160"<INT64,[unk__243]> ⬅️ ::Slice(%"query_shape__160", %"int64_0_1d__160", %"int64_1_1d__160", %"int64_0_1d__160", %"int64_1_1d__160")
1002 | # n5__160
%"int64_0_1d_1__160"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
1003 | # n6__160
%"int64_1_1d_2__160"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
1004 | # n7__160
%"int64_2_1d__160"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_2_1d')}
1005 | # n8__160
%"query_second_dims__160"<INT64,[unk__244]> ⬅️ ::Slice(%"query_shape__160", %"int64_1_1d_2__160", %"int64_2_1d__160", %"int64_0_1d_1__160", %"int64_1_1d_2__160")
1006 | # n9__160
%"int64_0_1d_3__160"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
1007 | # n10__160
%"int64_1_1d_4__160"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
1008 | # n11__160
%"int64_m2_1d__160"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
1009 | # n12__160
%"int64_m1_1d__160"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
1010 | # n13__160
%"num_heads__160"<INT64,[unk__245]> ⬅️ ::Slice(%"query_shape__160", %"int64_m2_1d__160", %"int64_m1_1d__160", %"int64_0_1d_3__160", %"int64_1_1d_4__160")
1011 | # n14__160
%"compute_log_sumexp__160"<INT64,?> ⬅️ ::Constant() {value_int=0}
1012 | # n15__160
%"compute_log_sumexp_as_bool__160"<BOOL,?> ⬅️ ::Cast(%"compute_log_sumexp__160") {to=9}
1013 | # n16__160
%"_scaled_dot_product_efficient_attention_1_1__114"<FLOAT,?> ⬅️ ::If(%"compute_log_sumexp_as_bool__160") {then_branch=
graph(
name=thenGraph_14,
inputs=(
),
outputs=(
%"logsum_exp__160"<FLOAT,?>
),
) {
0 | # n0__160_55
%"tmp__160"<FLOAT,[unk__244]> ⬅️ ::Cast(%"query_second_dims__160") {to=1}
1 | # n1__160_56
%"const__160"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
2 | # n2__160_57
%"const_cast__160"<FLOAT,?> ⬅️ ::CastLike(%"const__160", %"tmp__160")
3 | # n3__160_58
%"tmp_5__160"<FLOAT,[unk__244]> ⬅️ ::Div(%"tmp__160", %"const_cast__160")
4 | # n4__160_59
%"tmp_6__160"<FLOAT,[unk__244]> ⬅️ ::Ceil(%"tmp_5__160")
5 | # n5__160_60
%"const_7__160"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_7')}
6 | # n6__160_61
%"const_7_cast__160"<FLOAT,?> ⬅️ ::CastLike(%"const_7__160", %"tmp_6__160")
7 | # n7__160_62
%"tmp_8__160"<FLOAT,[unk__244]> ⬅️ ::Mul(%"tmp_6__160", %"const_7_cast__160")
8 | # n8__160_63
%"logsumexp_dim__160"<INT64,[unk__244]> ⬅️ ::Cast(%"tmp_8__160") {to=7}
9 | # n9__160_64
%"const_9__160"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
10 | # n10__160_65
%"tmp_10__160"<INT64,[unk__246]> ⬅️ ::Concat(%"query_first_dims__160", %"num_heads__160", %"logsumexp_dim__160") {axis=0}
11 | # n11__160_66
%"logsum_exp__160"<FLOAT,?> ⬅️ ::Expand(%"const_9__160", %"tmp_10__160")
return %"logsum_exp__160"<FLOAT,?>
}, else_branch=
graph(
name=elseGraph_14,
inputs=(
),
outputs=(
%"logsum_exp_14__160"<FLOAT,?>
),
) {
0 | # n0__160_67
%"const_11__160"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_11')}
1 | # n1__160_68
%"int64_0_1d_12__160"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_12')}
2 | # n2__160_69
%"int64_0_1d_12_cast__160"<INT64,[1]> ⬅️ ::CastLike(%"int64_0_1d_12__160", %"num_heads__160")
3 | # n3__160_70
%"tmp_13__160"<INT64,[unk__247]> ⬅️ ::Concat(%"query_first_dims__160", %"num_heads__160", %"int64_0_1d_12_cast__160") {axis=0}
4 | # n4__160_71
%"logsum_exp_14__160"<FLOAT,?> ⬅️ ::Expand(%"const_11__160", %"tmp_13__160")
return %"logsum_exp_14__160"<FLOAT,?>
}}
1014 | # n17__160
%"tmp_16__160"<INT64,[0]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[0]>('Empty_INTS')}
1015 | # n18__160
%"tmp_17__160"<FLOAT,?> ⬅️ ::ConstantOfShape(%"tmp_16__160")
1016 | # n19__160
%"_scaled_dot_product_efficient_attention_1_3__114"<INT64,?> ⬅️ ::Cast(%"tmp_17__160") {to=7}
1017 | # Transpose_324__114
%"transpose_7__114"<FLOAT16,?> ⬅️ ::Transpose(%"_scaled_dot_product_efficient_attention_1__114") {perm=[0, 2, 1, 3]}
1018 | # Constant_325__114
%"_val_276__114"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1019 | # n0__161
%"size_0__161"<INT64,[3]> ⬅️ ::Cast(%"_val_276__114") {to=7}
1020 | # n1__161
%"view_32__114"<FLOAT16,[unk__248,unk__249,unk__250]> ⬅️ ::Reshape(%"transpose_7__114", %"size_0__161")
1021 | # n0__164
%"tmp__164"<INT64,[2]> ⬅️ ::Shape(%"model.layers.1.self_attn.o_proj.weight")
1022 | # n1__164
%"rank__163"<INT64,?> ⬅️ ::Size(%"tmp__164")
1023 | # n1__163
%"int64_2__163"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
1024 | # n2__163
%"int64_2_cast__163"<INT64,?> ⬅️ ::CastLike(%"int64_2__163", %"rank__163")
1025 | # n3__163
%"cond__163"<BOOL,?> ⬅️ ::Equal(%"rank__163", %"int64_2_cast__163")
1026 | # n4__163
%"t_10__162"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__163") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__163"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__163_72
%"result__163"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.1.self_attn.o_proj.weight") {perm=[1, 0]}
return %"result__163"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__163"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__163_73
%"result_0__163"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.1.self_attn.o_proj.weight")
return %"result_0__163"<FLOAT16,[4096,4096]>
}}
1027 | # Constant_3__162
%"_val_3__162"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
1028 | # n0__165
%"size_0__165"<INT64,[2]> ⬅️ ::Cast(%"_val_3__162") {to=7}
1029 | # n1__165
%"view_33__162"<FLOAT16,[unk__251,unk__252]> ⬅️ ::Reshape(%"view_32__114", %"size_0__165")
1030 | # n0__166
%"mm_10__162"<FLOAT16,[unk__251,4096]> ⬅️ ::MatMul(%"view_33__162", %"t_10__162")
1031 | # Constant_6__162
%"_val_6__162"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1032 | # n0__167
%"size_0__167"<INT64,[3]> ⬅️ ::Cast(%"_val_6__162") {to=7}
1033 | # n1__167
%"model_layers_1_self_attn_1_2__104"<FLOAT16,[unk__253,unk__254,unk__255]> ⬅️ ::Reshape(%"mm_10__162", %"size_0__167")
1034 | # n0__168
%"alpha__168"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
1035 | # n1__168
%"alpha_0__168"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__168", %"model_layers_1_self_attn_1_2__104")
1036 | # n2__168
%"other_1__168"<FLOAT16,[unk__253,unk__254,unk__255]> ⬅️ ::Mul(%"model_layers_1_self_attn_1_2__104", %"alpha_0__168")
1037 | # n3__168
%"add_9__104"<FLOAT16,[unk__256,128,4096]> ⬅️ ::Add(%"model_layers_0_1_2__1", %"other_1__168")
1038 | # Cast_3__169
%"_to_copy_8__169"<FLOAT,[unk__256,128,4096]> ⬅️ ::Cast(%"add_9__104") {to=1}
1039 | # Constant_4__169
%"_val_2__169"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1040 | # Cast_5__169
%"scalar_tensor_default_3__169"<FLOAT,?> ⬅️ ::Cast(%"_val_2__169") {to=1}
1041 | # n0__170
%"pow_4__169"<FLOAT,[unk__256,128,4096]> ⬅️ ::Pow(%"_to_copy_8__169", %"scalar_tensor_default_3__169")
1042 | # Constant_7__169
%"_val_5__169"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
1043 | # n0__172
%"tmp__172"<INT64,[3]> ⬅️ ::Shape(%"pow_4__169")
1044 | # n1__172
%"tmp_0__172"<INT64,?> ⬅️ ::Size(%"tmp__172")
1045 | # n2__172
%"tmp_1__172"<INT64,?> ⬅️ ::Constant() {value_int=0}
1046 | # n3__172
%"cond__171"<BOOL,?> ⬅️ ::Equal(%"tmp_0__172", %"tmp_1__172")
1047 | # n1__171
%"mean_3__169"<FLOAT,?> ⬅️ ::If(%"cond__171") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__171"<FLOAT,[unk__256,128,4096]>
),
) {
0 | # n0__171_74
%"result__171"<FLOAT,[unk__256,128,4096]> ⬅️ ::Identity(%"pow_4__169")
return %"result__171"<FLOAT,[unk__256,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__171"<FLOAT,?>
),
) {
0 | # n0__173
%"tmp__173"<INT64,[1]> ⬅️ ::Shape(%"_val_5__169")
1 | # n1__173
%"tmp_0__173"<INT64,?> ⬅️ ::Size(%"tmp__173")
2 | # n2__173
%"tmp_1__173"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__173
%"cond_0__171"<BOOL,?> ⬅️ ::Equal(%"tmp_0__173", %"tmp_1__173")
4 | # n1__171_76
%"dim_3__171"<INT64,?> ⬅️ ::If(%"cond_0__171") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__171"<INT64,[1,1]>
),
) {
0 | # n0__171_77
%"int64_0__171"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__171_78
%"dim_1__171"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__169", %"int64_0__171")
return %"dim_1__171"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__171"<INT64,[1]>
),
) {
0 | # n0__171_79
%"dim_2__171"<INT64,[1]> ⬅️ ::Identity(%"_val_5__169")
return %"dim_2__171"<INT64,[1]>
}}
5 | # n2__171
%"result_4__171"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_4__169", %"dim_3__171") {keepdims=1}
return %"result_4__171"<FLOAT,?>
}}
1048 | # Constant_9__169
%"_val_7__169"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
1049 | # n0__174
%"alpha__174"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
1050 | # n1__174
%"alpha_0__174"<FLOAT,?> ⬅️ ::CastLike(%"alpha__174", %"_val_7__169")
1051 | # n2__174
%"other_1__174"<FLOAT,?> ⬅️ ::Mul(%"_val_7__169", %"alpha_0__174")
1052 | # n3__174
%"add_10__169"<FLOAT,?> ⬅️ ::Add(%"mean_3__169", %"other_1__174")
1053 | # n0__175
%"tmp__175"<FLOAT,?> ⬅️ ::Sqrt(%"add_10__169")
1054 | # n1__175
%"rsqrt_3__169"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__175")
1055 | # n0__176
%"mul_16__169"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_8__169", %"rsqrt_3__169")
1056 | # Cast_13__169
%"_to_copy_9__169"<FLOAT16,?> ⬅️ ::Cast(%"mul_16__169") {to=10}
1057 | # n0__177
%"model_layers_1_post_attention_layernorm_1__104"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.1.post_attention_layernorm.weight", %"_to_copy_9__169")
1058 | # n0__181
%"tmp__181"<INT64,[2]> ⬅️ ::Shape(%"model.layers.1.mlp.gate_proj.weight")
1059 | # n1__181
%"rank__180"<INT64,?> ⬅️ ::Size(%"tmp__181")
1060 | # n1__180
%"int64_2__180"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
1061 | # n2__180
%"int64_2_cast__180"<INT64,?> ⬅️ ::CastLike(%"int64_2__180", %"rank__180")
1062 | # n3__180
%"cond__180"<BOOL,?> ⬅️ ::Equal(%"rank__180", %"int64_2_cast__180")
1063 | # n4__180
%"t_11__179"<FLOAT16,[unk__257,unk__258]> ⬅️ ::If(%"cond__180") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__180"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__180_80
%"result__180"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.1.mlp.gate_proj.weight") {perm=[1, 0]}
return %"result__180"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__180"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__180_81
%"result_0__180"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.1.mlp.gate_proj.weight")
return %"result_0__180"<FLOAT16,[14336,4096]>
}}
1064 | # Constant_3__179
%"_val_3__179"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
1065 | # n0__182
%"size_0__182"<INT64,[2]> ⬅️ ::Cast(%"_val_3__179") {to=7}
1066 | # n1__182
%"view_35__179"<FLOAT16,[unk__259,unk__260]> ⬅️ ::Reshape(%"model_layers_1_post_attention_layernorm_1__104", %"size_0__182")
1067 | # n0__183
%"mm_11__179"<FLOAT16,[unk__259,unk__258]> ⬅️ ::MatMul(%"view_35__179", %"t_11__179")
1068 | # Constant_6__179
%"_val_6__179"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1069 | # n0__184
%"size_0__184"<INT64,[3]> ⬅️ ::Cast(%"_val_6__179") {to=7}
1070 | # n1__184
%"model_layers_1_mlp_gate_proj_1__178"<FLOAT16,[unk__261,unk__262,unk__263]> ⬅️ ::Reshape(%"mm_11__179", %"size_0__184")
1071 | # Cast_0__185
%"_to_copy_10__185"<FLOAT,[unk__261,unk__262,unk__263]> ⬅️ ::Cast(%"model_layers_1_mlp_gate_proj_1__178") {to=1}
1072 | # n0__186
%"sigmoid_1__185"<FLOAT,[unk__261,unk__262,unk__263]> ⬅️ ::Sigmoid(%"_to_copy_10__185")
1073 | # n0__187
%"mul_18__185"<FLOAT,[unk__261,unk__262,unk__263]> ⬅️ ::Mul(%"_to_copy_10__185", %"sigmoid_1__185")
1074 | # Cast_3__185
%"model_layers_1_mlp_act_fn_1__178"<FLOAT16,[unk__261,unk__262,unk__263]> ⬅️ ::Cast(%"mul_18__185") {to=10}
1075 | # n0__190
%"tmp__190"<INT64,[2]> ⬅️ ::Shape(%"model.layers.1.mlp.up_proj.weight")
1076 | # n1__190
%"rank__189"<INT64,?> ⬅️ ::Size(%"tmp__190")
1077 | # n1__189
%"int64_2__189"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
1078 | # n2__189
%"int64_2_cast__189"<INT64,?> ⬅️ ::CastLike(%"int64_2__189", %"rank__189")
1079 | # n3__189
%"cond__189"<BOOL,?> ⬅️ ::Equal(%"rank__189", %"int64_2_cast__189")
1080 | # n4__189
%"t_12__188"<FLOAT16,[unk__264,unk__265]> ⬅️ ::If(%"cond__189") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__189"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__189_82
%"result__189"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.1.mlp.up_proj.weight") {perm=[1, 0]}
return %"result__189"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__189"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__189_83
%"result_0__189"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.1.mlp.up_proj.weight")
return %"result_0__189"<FLOAT16,[14336,4096]>
}}
1081 | # Constant_3__188
%"_val_3__188"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
1082 | # n0__191
%"size_0__191"<INT64,[2]> ⬅️ ::Cast(%"_val_3__188") {to=7}
1083 | # n1__191
%"view_37__188"<FLOAT16,[unk__266,unk__267]> ⬅️ ::Reshape(%"model_layers_1_post_attention_layernorm_1__104", %"size_0__191")
1084 | # n0__192
%"mm_12__188"<FLOAT16,[unk__266,unk__265]> ⬅️ ::MatMul(%"view_37__188", %"t_12__188")
1085 | # Constant_6__188
%"_val_6__188"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1086 | # n0__193
%"size_0__193"<INT64,[3]> ⬅️ ::Cast(%"_val_6__188") {to=7}
1087 | # n1__193
%"model_layers_1_mlp_up_proj_1__178"<FLOAT16,[unk__268,unk__269,unk__270]> ⬅️ ::Reshape(%"mm_12__188", %"size_0__193")
1088 | # n0__194
%"mul_19__178"<FLOAT16,[unk__271,unk__272,unk__273]> ⬅️ ::Mul(%"model_layers_1_mlp_act_fn_1__178", %"model_layers_1_mlp_up_proj_1__178")
1089 | # n0__197
%"tmp__197"<INT64,[2]> ⬅️ ::Shape(%"model.layers.1.mlp.down_proj.weight")
1090 | # n1__197
%"rank__196"<INT64,?> ⬅️ ::Size(%"tmp__197")
1091 | # n1__196
%"int64_2__196"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
1092 | # n2__196
%"int64_2_cast__196"<INT64,?> ⬅️ ::CastLike(%"int64_2__196", %"rank__196")
1093 | # n3__196
%"cond__196"<BOOL,?> ⬅️ ::Equal(%"rank__196", %"int64_2_cast__196")
1094 | # n4__196
%"t_13__195"<FLOAT16,[unk__274,unk__275]> ⬅️ ::If(%"cond__196") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__196"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__196_84
%"result__196"<FLOAT16,[14336,4096]> ⬅️ ::Transpose(%"model.layers.1.mlp.down_proj.weight") {perm=[1, 0]}
return %"result__196"<FLOAT16,[14336,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__196"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__196_85
%"result_0__196"<FLOAT16,[4096,14336]> ⬅️ ::Identity(%"model.layers.1.mlp.down_proj.weight")
return %"result_0__196"<FLOAT16,[4096,14336]>
}}
1095 | # Constant_3__195
%"_val_3__195"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
1096 | # n0__198
%"size_0__198"<INT64,[2]> ⬅️ ::Cast(%"_val_3__195") {to=7}
1097 | # n1__198
%"view_39__195"<FLOAT16,[unk__276,unk__277]> ⬅️ ::Reshape(%"mul_19__178", %"size_0__198")
1098 | # n0__199
%"mm_13__195"<FLOAT16,[unk__276,unk__275]> ⬅️ ::MatMul(%"view_39__195", %"t_13__195")
1099 | # Constant_6__195
%"_val_6__195"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1100 | # n0__200
%"size_0__200"<INT64,[3]> ⬅️ ::Cast(%"_val_6__195") {to=7}
1101 | # n1__200
%"model_layers_1_mlp_1__104"<FLOAT16,[unk__278,unk__279,unk__280]> ⬅️ ::Reshape(%"mm_13__195", %"size_0__200")
1102 | # n0__201
%"alpha__201"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
1103 | # n1__201
%"alpha_0__201"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__201", %"model_layers_1_mlp_1__104")
1104 | # n2__201
%"other_1__201"<FLOAT16,[unk__278,unk__279,unk__280]> ⬅️ ::Mul(%"model_layers_1_mlp_1__104", %"alpha_0__201")
1105 | # n3__201
%"model_layers_1_1_2__1"<FLOAT16,[unk__281,128,4096]> ⬅️ ::Add(%"add_9__104", %"other_1__201")
1106 | # Cast_3__203
%"_to_copy_12__203"<FLOAT,[unk__281,128,4096]> ⬅️ ::Cast(%"model_layers_1_1_2__1") {to=1}
1107 | # Constant_4__203
%"_val_2__203"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1108 | # Cast_5__203
%"scalar_tensor_default_4__203"<FLOAT,?> ⬅️ ::Cast(%"_val_2__203") {to=1}
1109 | # n0__204
%"pow_5__203"<FLOAT,[unk__281,128,4096]> ⬅️ ::Pow(%"_to_copy_12__203", %"scalar_tensor_default_4__203")
1110 | # Constant_7__203
%"_val_5__203"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
1111 | # n0__206
%"tmp__206"<INT64,[3]> ⬅️ ::Shape(%"pow_5__203")
1112 | # n1__206
%"tmp_0__206"<INT64,?> ⬅️ ::Size(%"tmp__206")
1113 | # n2__206
%"tmp_1__206"<INT64,?> ⬅️ ::Constant() {value_int=0}
1114 | # n3__206
%"cond__205"<BOOL,?> ⬅️ ::Equal(%"tmp_0__206", %"tmp_1__206")
1115 | # n1__205
%"mean_4__203"<FLOAT,?> ⬅️ ::If(%"cond__205") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__205"<FLOAT,[unk__281,128,4096]>
),
) {
0 | # n0__205_86
%"result__205"<FLOAT,[unk__281,128,4096]> ⬅️ ::Identity(%"pow_5__203")
return %"result__205"<FLOAT,[unk__281,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__205"<FLOAT,?>
),
) {
0 | # n0__207
%"tmp__207"<INT64,[1]> ⬅️ ::Shape(%"_val_5__203")
1 | # n1__207
%"tmp_0__207"<INT64,?> ⬅️ ::Size(%"tmp__207")
2 | # n2__207
%"tmp_1__207"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__207
%"cond_0__205"<BOOL,?> ⬅️ ::Equal(%"tmp_0__207", %"tmp_1__207")
4 | # n1__205_88
%"dim_3__205"<INT64,?> ⬅️ ::If(%"cond_0__205") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__205"<INT64,[1,1]>
),
) {
0 | # n0__205_89
%"int64_0__205"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__205_90
%"dim_1__205"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__203", %"int64_0__205")
return %"dim_1__205"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__205"<INT64,[1]>
),
) {
0 | # n0__205_91
%"dim_2__205"<INT64,[1]> ⬅️ ::Identity(%"_val_5__203")
return %"dim_2__205"<INT64,[1]>
}}
5 | # n2__205
%"result_4__205"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_5__203", %"dim_3__205") {keepdims=1}
return %"result_4__205"<FLOAT,?>
}}
1116 | # Constant_9__203
%"_val_7__203"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
1117 | # n0__208
%"alpha__208"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
1118 | # n1__208
%"alpha_0__208"<FLOAT,?> ⬅️ ::CastLike(%"alpha__208", %"_val_7__203")
1119 | # n2__208
%"other_1__208"<FLOAT,?> ⬅️ ::Mul(%"_val_7__203", %"alpha_0__208")
1120 | # n3__208
%"add_12__203"<FLOAT,?> ⬅️ ::Add(%"mean_4__203", %"other_1__208")
1121 | # n0__209
%"tmp__209"<FLOAT,?> ⬅️ ::Sqrt(%"add_12__203")
1122 | # n1__209
%"rsqrt_4__203"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__209")
1123 | # n0__210
%"mul_20__203"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_12__203", %"rsqrt_4__203")
1124 | # Cast_13__203
%"_to_copy_13__203"<FLOAT16,?> ⬅️ ::Cast(%"mul_20__203") {to=10}
1125 | # n0__211
%"model_layers_2_input_layernorm_1__202"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.2.input_layernorm.weight", %"_to_copy_13__203")
1126 | # n0__215
%"tmp__215"<INT64,[2]> ⬅️ ::Shape(%"model.layers.2.self_attn.q_proj.weight")
1127 | # n1__215
%"rank__214"<INT64,?> ⬅️ ::Size(%"tmp__215")
1128 | # n1__214
%"int64_2__214"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
1129 | # n2__214
%"int64_2_cast__214"<INT64,?> ⬅️ ::CastLike(%"int64_2__214", %"rank__214")
1130 | # n3__214
%"cond__214"<BOOL,?> ⬅️ ::Equal(%"rank__214", %"int64_2_cast__214")
1131 | # n4__214
%"t_14__213"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__214") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__214"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__214_92
%"result__214"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.2.self_attn.q_proj.weight") {perm=[1, 0]}
return %"result__214"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__214"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__214_93
%"result_0__214"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.2.self_attn.q_proj.weight")
return %"result_0__214"<FLOAT16,[4096,4096]>
}}
1132 | # Constant_3__213
%"_val_3__213"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
1133 | # n0__216
%"size_0__216"<INT64,[2]> ⬅️ ::Cast(%"_val_3__213") {to=7}
1134 | # n1__216
%"view_41__213"<FLOAT16,[unk__282,unk__283]> ⬅️ ::Reshape(%"model_layers_2_input_layernorm_1__202", %"size_0__216")
1135 | # n0__217
%"mm_14__213"<FLOAT16,[unk__282,4096]> ⬅️ ::MatMul(%"view_41__213", %"t_14__213")
1136 | # Constant_6__213
%"_val_6__213"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1137 | # n0__218
%"size_0__218"<INT64,[3]> ⬅️ ::Cast(%"_val_6__213") {to=7}
1138 | # n1__218
%"model_layers_2_self_attn_q_proj_1__212"<FLOAT16,[unk__284,unk__285,unk__286]> ⬅️ ::Reshape(%"mm_14__213", %"size_0__218")
1139 | # n0__221
%"tmp__221"<INT64,[2]> ⬅️ ::Shape(%"model.layers.2.self_attn.k_proj.weight")
1140 | # n1__221
%"rank__220"<INT64,?> ⬅️ ::Size(%"tmp__221")
1141 | # n1__220
%"int64_2__220"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
1142 | # n2__220
%"int64_2_cast__220"<INT64,?> ⬅️ ::CastLike(%"int64_2__220", %"rank__220")
1143 | # n3__220
%"cond__220"<BOOL,?> ⬅️ ::Equal(%"rank__220", %"int64_2_cast__220")
1144 | # n4__220
%"t_15__219"<FLOAT16,[unk__287,unk__288]> ⬅️ ::If(%"cond__220") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__220"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__220_94
%"result__220"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.2.self_attn.k_proj.weight") {perm=[1, 0]}
return %"result__220"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__220"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__220_95
%"result_0__220"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.2.self_attn.k_proj.weight")
return %"result_0__220"<FLOAT16,[1024,4096]>
}}
1145 | # Constant_3__219
%"_val_3__219"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
1146 | # n0__222
%"size_0__222"<INT64,[2]> ⬅️ ::Cast(%"_val_3__219") {to=7}
1147 | # n1__222
%"view_43__219"<FLOAT16,[unk__289,unk__290]> ⬅️ ::Reshape(%"model_layers_2_input_layernorm_1__202", %"size_0__222")
1148 | # n0__223
%"mm_15__219"<FLOAT16,[unk__289,unk__288]> ⬅️ ::MatMul(%"view_43__219", %"t_15__219")
1149 | # Constant_6__219
%"_val_6__219"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1150 | # n0__224
%"size_0__224"<INT64,[3]> ⬅️ ::Cast(%"_val_6__219") {to=7}
1151 | # n1__224
%"model_layers_2_self_attn_k_proj_1__212"<FLOAT16,[unk__291,unk__292,unk__293]> ⬅️ ::Reshape(%"mm_15__219", %"size_0__224")
1152 | # n0__227
%"tmp__227"<INT64,[2]> ⬅️ ::Shape(%"model.layers.2.self_attn.v_proj.weight")
1153 | # n1__227
%"rank__226"<INT64,?> ⬅️ ::Size(%"tmp__227")
1154 | # n1__226
%"int64_2__226"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
1155 | # n2__226
%"int64_2_cast__226"<INT64,?> ⬅️ ::CastLike(%"int64_2__226", %"rank__226")
1156 | # n3__226
%"cond__226"<BOOL,?> ⬅️ ::Equal(%"rank__226", %"int64_2_cast__226")
1157 | # n4__226
%"t_16__225"<FLOAT16,[unk__294,unk__295]> ⬅️ ::If(%"cond__226") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__226"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__226_96
%"result__226"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.2.self_attn.v_proj.weight") {perm=[1, 0]}
return %"result__226"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__226"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__226_97
%"result_0__226"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.2.self_attn.v_proj.weight")
return %"result_0__226"<FLOAT16,[1024,4096]>
}}
1158 | # Constant_3__225
%"_val_3__225"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
1159 | # n0__228
%"size_0__228"<INT64,[2]> ⬅️ ::Cast(%"_val_3__225") {to=7}
1160 | # n1__228
%"view_45__225"<FLOAT16,[unk__296,unk__297]> ⬅️ ::Reshape(%"model_layers_2_input_layernorm_1__202", %"size_0__228")
1161 | # n0__229
%"mm_16__225"<FLOAT16,[unk__296,unk__295]> ⬅️ ::MatMul(%"view_45__225", %"t_16__225")
1162 | # Constant_6__225
%"_val_6__225"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1163 | # n0__230
%"size_0__230"<INT64,[3]> ⬅️ ::Cast(%"_val_6__225") {to=7}
1164 | # n1__230
%"model_layers_2_self_attn_v_proj_1__212"<FLOAT16,[unk__298,unk__299,unk__300]> ⬅️ ::Reshape(%"mm_16__225", %"size_0__230")
1165 | # Constant_61__212
%"_val_8__212"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
1166 | # n0__231
%"size_0__231"<INT64,[4]> ⬅️ ::Cast(%"_val_8__212") {to=7}
1167 | # n1__231
%"view_47__212"<FLOAT16,[unk__301,unk__302,unk__303,unk__304]> ⬅️ ::Reshape(%"model_layers_2_self_attn_q_proj_1__212", %"size_0__231")
1168 | # Transpose_63__212
%"transpose_8__212"<FLOAT16,[unk__301,unk__303,unk__302,unk__304]> ⬅️ ::Transpose(%"view_47__212") {perm=[0, 2, 1, 3]}
1169 | # Constant_64__212
%"_val_11__212"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
1170 | # n0__232
%"size_0__232"<INT64,[4]> ⬅️ ::Cast(%"_val_11__212") {to=7}
1171 | # n1__232
%"view_48__212"<FLOAT16,[unk__305,unk__306,unk__307,unk__308]> ⬅️ ::Reshape(%"model_layers_2_self_attn_k_proj_1__212", %"size_0__232")
1172 | # Transpose_66__212
%"transpose_9__212"<FLOAT16,[unk__305,unk__307,unk__306,unk__308]> ⬅️ ::Transpose(%"view_48__212") {perm=[0, 2, 1, 3]}
1173 | # Constant_67__212
%"_val_14__212"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
1174 | # n0__233
%"size_0__233"<INT64,[4]> ⬅️ ::Cast(%"_val_14__212") {to=7}
1175 | # n1__233
%"view_49__212"<FLOAT16,[unk__309,unk__310,unk__311,unk__312]> ⬅️ ::Reshape(%"model_layers_2_self_attn_v_proj_1__212", %"size_0__233")
1176 | # Transpose_69__212
%"model_1_4"<FLOAT16,[1,8,128,128]> ⬅️ ::Transpose(%"view_49__212") {perm=[0, 2, 1, 3]}
1177 | # Constant_8__234
%"_val_1__234"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1178 | # Cast_9__234
%"_val_2__234"<INT64,?> ⬅️ ::Cast(%"_val_1__234") {to=7}
1179 | # Constant_10__234
%"_val_3__234"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1180 | # Reshape_11__234
%"_val_4__234"<INT64,[1]> ⬅️ ::Reshape(%"_val_2__234", %"_val_3__234") {allowzero=0}
1181 | # Constant_12__234
%"_val_5__234"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1182 | # Cast_13__234
%"_val_6__234"<INT64,?> ⬅️ ::Cast(%"_val_5__234") {to=7}
1183 | # Constant_14__234
%"_val_7__234"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1184 | # Reshape_15__234
%"_val_8__234"<INT64,[1]> ⬅️ ::Reshape(%"_val_6__234", %"_val_7__234") {allowzero=0}
1185 | # Constant_16__234
%"_val_9__234"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1186 | # Cast_17__234
%"_val_10__234"<INT64,?> ⬅️ ::Cast(%"_val_9__234") {to=7}
1187 | # Constant_18__234
%"_val_11__234"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1188 | # Reshape_19__234
%"_val_12__234"<INT64,[1]> ⬅️ ::Reshape(%"_val_10__234", %"_val_11__234") {allowzero=0}
1189 | # Constant_20__234
%"_val_13__234"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1190 | # Cast_21__234
%"_val_14__234"<INT64,?> ⬅️ ::Cast(%"_val_13__234") {to=7}
1191 | # Constant_22__234
%"_val_15__234"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1192 | # Reshape_23__234
%"_val_16__234"<INT64,[1]> ⬅️ ::Reshape(%"_val_14__234", %"_val_15__234") {allowzero=0}
1193 | # Slice_24__234
%"model_layers_2_self_attn_rotary_emb_1__212"<FLOAT16,[unk__313,unk__314]> ⬅️ ::Slice(%"model.layers.2.self_attn.rotary_emb.cos_cached", %"_val_4__234", %"_val_8__234", %"_val_12__234", %"_val_16__234")
1194 | # Constant_25__234
%"_val_19__234"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1195 | # Cast_26__234
%"_val_20__234"<INT64,?> ⬅️ ::Cast(%"_val_19__234") {to=7}
1196 | # Constant_27__234
%"_val_21__234"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1197 | # Reshape_28__234
%"_val_22__234"<INT64,[1]> ⬅️ ::Reshape(%"_val_20__234", %"_val_21__234") {allowzero=0}
1198 | # Constant_29__234
%"_val_23__234"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1199 | # Cast_30__234
%"_val_24__234"<INT64,?> ⬅️ ::Cast(%"_val_23__234") {to=7}
1200 | # Constant_31__234
%"_val_25__234"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1201 | # Reshape_32__234
%"_val_26__234"<INT64,[1]> ⬅️ ::Reshape(%"_val_24__234", %"_val_25__234") {allowzero=0}
1202 | # Constant_33__234
%"_val_27__234"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1203 | # Cast_34__234
%"_val_28__234"<INT64,?> ⬅️ ::Cast(%"_val_27__234") {to=7}
1204 | # Constant_35__234
%"_val_29__234"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1205 | # Reshape_36__234
%"_val_30__234"<INT64,[1]> ⬅️ ::Reshape(%"_val_28__234", %"_val_29__234") {allowzero=0}
1206 | # Constant_37__234
%"_val_31__234"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1207 | # Cast_38__234
%"_val_32__234"<INT64,?> ⬅️ ::Cast(%"_val_31__234") {to=7}
1208 | # Constant_39__234
%"_val_33__234"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1209 | # Reshape_40__234
%"_val_34__234"<INT64,[1]> ⬅️ ::Reshape(%"_val_32__234", %"_val_33__234") {allowzero=0}
1210 | # Slice_41__234
%"model_layers_2_self_attn_rotary_emb_1_1__212"<FLOAT16,[unk__315,unk__316]> ⬅️ ::Slice(%"model.layers.2.self_attn.rotary_emb.sin_cached", %"_val_22__234", %"_val_26__234", %"_val_30__234", %"_val_34__234")
1211 | # Transpose_71__212
%"_val_21__212"<FLOAT16,[unk__313,unk__314]> ⬅️ ::Transpose(%"model_layers_2_self_attn_rotary_emb_1__212") {perm=[0, 1]}
1212 | # Max_72__212
%"_val_22__212"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
1213 | # Shape_73__212
%"_val_23__212"<INT64,[2]> ⬅️ ::Shape(%"_val_22__212") {start=0}
1214 | # Expand_74__212
%"_val_24__212"<INT64,[unk__317,unk__318]> ⬅️ ::Expand(%"view__1", %"_val_23__212")
1215 | # Constant_75__212
%"_val_25__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1216 | # Unsqueeze_76__212
%"_val_26__212"<INT64,[unk__317,unk__318,1]> ⬅️ ::Unsqueeze(%"_val_24__212", %"_val_25__212")
1217 | # Concat_77__212
%"_val_27__212"<INT64,[unk__317,unk__318,1]> ⬅️ ::Concat(%"_val_26__212") {axis=-1}
1218 | # GatherND_78__212
%"_val_28__212"<FLOAT16,[unk__317,unk__318,unk__314]> ⬅️ ::GatherND(%"_val_21__212", %"_val_27__212") {batch_dims=0}
1219 | # Transpose_79__212
%"index_4__212"<FLOAT16,[unk__317,unk__318,unk__314]> ⬅️ ::Transpose(%"_val_28__212") {perm=[0, 1, 2]}
1220 | # n0__235
%"dim__235"<INT64,?> ⬅️ ::Constant() {value_int=1}
1221 | # n1__235
%"dim_0__235"<INT64,?> ⬅️ ::Cast(%"dim__235") {to=7}
1222 | # n2__235
%"unsqueeze_9__212"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_4__212", %"dim_0__235")
1223 | # Transpose_81__212
%"_val_31__212"<FLOAT16,[unk__315,unk__316]> ⬅️ ::Transpose(%"model_layers_2_self_attn_rotary_emb_1_1__212") {perm=[0, 1]}
1224 | # Max_82__212
%"_val_32__212"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
1225 | # Shape_83__212
%"_val_33__212"<INT64,[2]> ⬅️ ::Shape(%"_val_32__212") {start=0}
1226 | # Expand_84__212
%"_val_34__212"<INT64,[unk__319,unk__320]> ⬅️ ::Expand(%"view__1", %"_val_33__212")
1227 | # Constant_85__212
%"_val_35__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1228 | # Unsqueeze_86__212
%"_val_36__212"<INT64,[unk__319,unk__320,1]> ⬅️ ::Unsqueeze(%"_val_34__212", %"_val_35__212")
1229 | # Concat_87__212
%"_val_37__212"<INT64,[unk__319,unk__320,1]> ⬅️ ::Concat(%"_val_36__212") {axis=-1}
1230 | # GatherND_88__212
%"_val_38__212"<FLOAT16,[unk__319,unk__320,unk__316]> ⬅️ ::GatherND(%"_val_31__212", %"_val_37__212") {batch_dims=0}
1231 | # Transpose_89__212
%"index_5__212"<FLOAT16,[unk__319,unk__320,unk__316]> ⬅️ ::Transpose(%"_val_38__212") {perm=[0, 1, 2]}
1232 | # n0__236
%"dim__236"<INT64,?> ⬅️ ::Constant() {value_int=1}
1233 | # n1__236
%"dim_0__236"<INT64,?> ⬅️ ::Cast(%"dim__236") {to=7}
1234 | # n2__236
%"unsqueeze_10__212"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_5__212", %"dim_0__236")
1235 | # n0__237
%"mul_22__212"<FLOAT16,?> ⬅️ ::Mul(%"transpose_8__212", %"unsqueeze_9__212")
1236 | # Constant_92__212
%"_val_42__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1237 | # Cast_93__212
%"_val_43__212"<INT64,?> ⬅️ ::Cast(%"_val_42__212") {to=7}
1238 | # Constant_94__212
%"_val_44__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1239 | # Reshape_95__212
%"_val_45__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_43__212", %"_val_44__212") {allowzero=0}
1240 | # Constant_96__212
%"_val_46__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1241 | # Cast_97__212
%"_val_47__212"<INT64,?> ⬅️ ::Cast(%"_val_46__212") {to=7}
1242 | # Constant_98__212
%"_val_48__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1243 | # Reshape_99__212
%"_val_49__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_47__212", %"_val_48__212") {allowzero=0}
1244 | # Constant_100__212
%"_val_50__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1245 | # Cast_101__212
%"_val_51__212"<INT64,?> ⬅️ ::Cast(%"_val_50__212") {to=7}
1246 | # Constant_102__212
%"_val_52__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1247 | # Reshape_103__212
%"_val_53__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_51__212", %"_val_52__212") {allowzero=0}
1248 | # Constant_104__212
%"_val_54__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1249 | # Cast_105__212
%"_val_55__212"<INT64,?> ⬅️ ::Cast(%"_val_54__212") {to=7}
1250 | # Constant_106__212
%"_val_56__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1251 | # Reshape_107__212
%"_val_57__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_55__212", %"_val_56__212") {allowzero=0}
1252 | # Slice_108__212
%"slice_31__212"<FLOAT16,[unk__321,unk__322,unk__323,unk__324]> ⬅️ ::Slice(%"transpose_8__212", %"_val_45__212", %"_val_49__212", %"_val_53__212", %"_val_57__212")
1253 | # Constant_109__212
%"_val_59__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1254 | # Cast_110__212
%"_val_60__212"<INT64,?> ⬅️ ::Cast(%"_val_59__212") {to=7}
1255 | # Constant_111__212
%"_val_61__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1256 | # Reshape_112__212
%"_val_62__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_60__212", %"_val_61__212") {allowzero=0}
1257 | # Constant_113__212
%"_val_63__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1258 | # Cast_114__212
%"_val_64__212"<INT64,?> ⬅️ ::Cast(%"_val_63__212") {to=7}
1259 | # Constant_115__212
%"_val_65__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1260 | # Reshape_116__212
%"_val_66__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_64__212", %"_val_65__212") {allowzero=0}
1261 | # Constant_117__212
%"_val_67__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1262 | # Cast_118__212
%"_val_68__212"<INT64,?> ⬅️ ::Cast(%"_val_67__212") {to=7}
1263 | # Constant_119__212
%"_val_69__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1264 | # Reshape_120__212
%"_val_70__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_68__212", %"_val_69__212") {allowzero=0}
1265 | # Constant_121__212
%"_val_71__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1266 | # Cast_122__212
%"_val_72__212"<INT64,?> ⬅️ ::Cast(%"_val_71__212") {to=7}
1267 | # Constant_123__212
%"_val_73__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1268 | # Reshape_124__212
%"_val_74__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_72__212", %"_val_73__212") {allowzero=0}
1269 | # Slice_125__212
%"slice_32__212"<FLOAT16,[unk__325,unk__326,unk__327,unk__328]> ⬅️ ::Slice(%"transpose_8__212", %"_val_62__212", %"_val_66__212", %"_val_70__212", %"_val_74__212")
1270 | # n0__238
%"neg_4__212"<FLOAT16,[unk__325,unk__326,unk__327,unk__328]> ⬅️ ::Neg(%"slice_32__212")
1271 | # SequenceConstruct_127__212
%"77__212"<Sequence(Tensor(FLOAT16)),[unk__329,unk__330,unk__331,unk__332]> ⬅️ ::SequenceConstruct(%"neg_4__212", %"slice_31__212")
1272 | # n0__239
%"cat_4__212"<FLOAT16,[unk__329,unk__330,unk__331,unk__333]> ⬅️ ::ConcatFromSequence(%"77__212") {axis=-1}
1273 | # n0__240
%"mul_23__212"<FLOAT16,?> ⬅️ ::Mul(%"cat_4__212", %"unsqueeze_10__212")
1274 | # n0__241
%"alpha__241"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
1275 | # n1__241
%"alpha_0__241"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__241", %"mul_23__212")
1276 | # n2__241
%"other_1__241"<FLOAT16,?> ⬅️ ::Mul(%"mul_23__212", %"alpha_0__241")
1277 | # n3__241
%"add_13__212"<FLOAT16,?> ⬅️ ::Add(%"mul_22__212", %"other_1__241")
1278 | # n0__242
%"mul_24__212"<FLOAT16,?> ⬅️ ::Mul(%"transpose_9__212", %"unsqueeze_9__212")
1279 | # Constant_132__212
%"_val_82__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1280 | # Cast_133__212
%"_val_83__212"<INT64,?> ⬅️ ::Cast(%"_val_82__212") {to=7}
1281 | # Constant_134__212
%"_val_84__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1282 | # Reshape_135__212
%"_val_85__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_83__212", %"_val_84__212") {allowzero=0}
1283 | # Constant_136__212
%"_val_86__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1284 | # Cast_137__212
%"_val_87__212"<INT64,?> ⬅️ ::Cast(%"_val_86__212") {to=7}
1285 | # Constant_138__212
%"_val_88__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1286 | # Reshape_139__212
%"_val_89__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_87__212", %"_val_88__212") {allowzero=0}
1287 | # Constant_140__212
%"_val_90__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1288 | # Cast_141__212
%"_val_91__212"<INT64,?> ⬅️ ::Cast(%"_val_90__212") {to=7}
1289 | # Constant_142__212
%"_val_92__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1290 | # Reshape_143__212
%"_val_93__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_91__212", %"_val_92__212") {allowzero=0}
1291 | # Constant_144__212
%"_val_94__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1292 | # Cast_145__212
%"_val_95__212"<INT64,?> ⬅️ ::Cast(%"_val_94__212") {to=7}
1293 | # Constant_146__212
%"_val_96__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1294 | # Reshape_147__212
%"_val_97__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_95__212", %"_val_96__212") {allowzero=0}
1295 | # Slice_148__212
%"slice_33__212"<FLOAT16,[unk__334,unk__335,unk__336,unk__337]> ⬅️ ::Slice(%"transpose_9__212", %"_val_85__212", %"_val_89__212", %"_val_93__212", %"_val_97__212")
1296 | # Constant_149__212
%"_val_99__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1297 | # Cast_150__212
%"_val_100__212"<INT64,?> ⬅️ ::Cast(%"_val_99__212") {to=7}
1298 | # Constant_151__212
%"_val_101__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1299 | # Reshape_152__212
%"_val_102__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_100__212", %"_val_101__212") {allowzero=0}
1300 | # Constant_153__212
%"_val_103__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1301 | # Cast_154__212
%"_val_104__212"<INT64,?> ⬅️ ::Cast(%"_val_103__212") {to=7}
1302 | # Constant_155__212
%"_val_105__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1303 | # Reshape_156__212
%"_val_106__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_104__212", %"_val_105__212") {allowzero=0}
1304 | # Constant_157__212
%"_val_107__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1305 | # Cast_158__212
%"_val_108__212"<INT64,?> ⬅️ ::Cast(%"_val_107__212") {to=7}
1306 | # Constant_159__212
%"_val_109__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1307 | # Reshape_160__212
%"_val_110__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_108__212", %"_val_109__212") {allowzero=0}
1308 | # Constant_161__212
%"_val_111__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1309 | # Cast_162__212
%"_val_112__212"<INT64,?> ⬅️ ::Cast(%"_val_111__212") {to=7}
1310 | # Constant_163__212
%"_val_113__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1311 | # Reshape_164__212
%"_val_114__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_112__212", %"_val_113__212") {allowzero=0}
1312 | # Slice_165__212
%"slice_34__212"<FLOAT16,[unk__338,unk__339,unk__340,unk__341]> ⬅️ ::Slice(%"transpose_9__212", %"_val_102__212", %"_val_106__212", %"_val_110__212", %"_val_114__212")
1313 | # n0__243
%"neg_5__212"<FLOAT16,[unk__338,unk__339,unk__340,unk__341]> ⬅️ ::Neg(%"slice_34__212")
1314 | # SequenceConstruct_167__212
%"117__212"<Sequence(Tensor(FLOAT16)),[unk__342,unk__343,unk__344,unk__345]> ⬅️ ::SequenceConstruct(%"neg_5__212", %"slice_33__212")
1315 | # n0__244
%"cat_5__212"<FLOAT16,[unk__342,unk__343,unk__344,unk__346]> ⬅️ ::ConcatFromSequence(%"117__212") {axis=-1}
1316 | # n0__245
%"mul_25__212"<FLOAT16,?> ⬅️ ::Mul(%"cat_5__212", %"unsqueeze_10__212")
1317 | # n0__246
%"alpha__246"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
1318 | # n1__246
%"alpha_0__246"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__246", %"mul_25__212")
1319 | # n2__246
%"other_1__246"<FLOAT16,?> ⬅️ ::Mul(%"mul_25__212", %"alpha_0__246")
1320 | # n3__246
%"model_1_5"<FLOAT16,[1,8,128,128]> ⬅️ ::Add(%"mul_24__212", %"other_1__246")
1321 | # Constant_171__212
%"_val_121__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1322 | # Cast_172__212
%"_val_122__212"<INT64,?> ⬅️ ::Cast(%"_val_121__212") {to=7}
1323 | # Constant_173__212
%"_val_123__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1324 | # Reshape_174__212
%"_val_124__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_122__212", %"_val_123__212") {allowzero=0}
1325 | # Constant_175__212
%"_val_125__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1326 | # Cast_176__212
%"_val_126__212"<INT64,?> ⬅️ ::Cast(%"_val_125__212") {to=7}
1327 | # Constant_177__212
%"_val_127__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1328 | # Reshape_178__212
%"_val_128__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_126__212", %"_val_127__212") {allowzero=0}
1329 | # Constant_179__212
%"_val_129__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1330 | # Cast_180__212
%"_val_130__212"<INT64,?> ⬅️ ::Cast(%"_val_129__212") {to=7}
1331 | # Constant_181__212
%"_val_131__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1332 | # Reshape_182__212
%"_val_132__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_130__212", %"_val_131__212") {allowzero=0}
1333 | # Constant_183__212
%"_val_133__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1334 | # Cast_184__212
%"_val_134__212"<INT64,?> ⬅️ ::Cast(%"_val_133__212") {to=7}
1335 | # Constant_185__212
%"_val_135__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1336 | # Reshape_186__212
%"_val_136__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_134__212", %"_val_135__212") {allowzero=0}
1337 | # Slice_187__212
%"slice_35__212"<FLOAT16,[unk__347,unk__348,unk__349,unk__350]> ⬅️ ::Slice(%"model_1_5", %"_val_124__212", %"_val_128__212", %"_val_132__212", %"_val_136__212")
1338 | # Constant_188__212
%"_val_138__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1339 | # Cast_189__212
%"_val_139__212"<INT64,?> ⬅️ ::Cast(%"_val_138__212") {to=7}
1340 | # Constant_190__212
%"_val_140__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1341 | # Reshape_191__212
%"_val_141__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_139__212", %"_val_140__212") {allowzero=0}
1342 | # Constant_192__212
%"_val_142__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1343 | # Cast_193__212
%"_val_143__212"<INT64,?> ⬅️ ::Cast(%"_val_142__212") {to=7}
1344 | # Constant_194__212
%"_val_144__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1345 | # Reshape_195__212
%"_val_145__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_143__212", %"_val_144__212") {allowzero=0}
1346 | # Constant_196__212
%"_val_146__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1347 | # Cast_197__212
%"_val_147__212"<INT64,?> ⬅️ ::Cast(%"_val_146__212") {to=7}
1348 | # Constant_198__212
%"_val_148__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1349 | # Reshape_199__212
%"_val_149__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_147__212", %"_val_148__212") {allowzero=0}
1350 | # Constant_200__212
%"_val_150__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1351 | # Cast_201__212
%"_val_151__212"<INT64,?> ⬅️ ::Cast(%"_val_150__212") {to=7}
1352 | # Constant_202__212
%"_val_152__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1353 | # Reshape_203__212
%"_val_153__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_151__212", %"_val_152__212") {allowzero=0}
1354 | # Slice_204__212
%"slice_36__212"<FLOAT16,[unk__351,unk__352,unk__353,unk__354]> ⬅️ ::Slice(%"slice_35__212", %"_val_141__212", %"_val_145__212", %"_val_149__212", %"_val_153__212")
1355 | # n0__247
%"dim__247"<INT64,?> ⬅️ ::Constant() {value_int=2}
1356 | # n1__247
%"dim_0__247"<INT64,?> ⬅️ ::Cast(%"dim__247") {to=7}
1357 | # n2__247
%"unsqueeze_11__212"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_36__212", %"dim_0__247")
1358 | # Constant_206__212
%"_val_156__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1359 | # Cast_207__212
%"_val_157__212"<INT64,?> ⬅️ ::Cast(%"_val_156__212") {to=7}
1360 | # Constant_208__212
%"_val_158__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1361 | # Reshape_209__212
%"_val_159__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_157__212", %"_val_158__212") {allowzero=0}
1362 | # Constant_210__212
%"_val_160__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1363 | # Cast_211__212
%"_val_161__212"<INT64,?> ⬅️ ::Cast(%"_val_160__212") {to=7}
1364 | # Constant_212__212
%"_val_162__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1365 | # Reshape_213__212
%"_val_163__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_161__212", %"_val_162__212") {allowzero=0}
1366 | # Constant_214__212
%"_val_164__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1367 | # Cast_215__212
%"_val_165__212"<INT64,?> ⬅️ ::Cast(%"_val_164__212") {to=7}
1368 | # Constant_216__212
%"_val_166__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1369 | # Reshape_217__212
%"_val_167__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_165__212", %"_val_166__212") {allowzero=0}
1370 | # Constant_218__212
%"_val_168__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1371 | # Cast_219__212
%"_val_169__212"<INT64,?> ⬅️ ::Cast(%"_val_168__212") {to=7}
1372 | # Constant_220__212
%"_val_170__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1373 | # Reshape_221__212
%"_val_171__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_169__212", %"_val_170__212") {allowzero=0}
1374 | # Slice_222__212
%"slice_37__212"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_11__212", %"_val_159__212", %"_val_163__212", %"_val_167__212", %"_val_171__212")
1375 | # Constant_223__212
%"_val_173__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1376 | # Cast_224__212
%"_val_174__212"<INT64,?> ⬅️ ::Cast(%"_val_173__212") {to=7}
1377 | # Constant_225__212
%"_val_175__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1378 | # Reshape_226__212
%"_val_176__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_174__212", %"_val_175__212") {allowzero=0}
1379 | # Constant_227__212
%"_val_177__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1380 | # Cast_228__212
%"_val_178__212"<INT64,?> ⬅️ ::Cast(%"_val_177__212") {to=7}
1381 | # Constant_229__212
%"_val_179__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1382 | # Reshape_230__212
%"_val_180__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_178__212", %"_val_179__212") {allowzero=0}
1383 | # Constant_231__212
%"_val_181__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1384 | # Cast_232__212
%"_val_182__212"<INT64,?> ⬅️ ::Cast(%"_val_181__212") {to=7}
1385 | # Constant_233__212
%"_val_183__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1386 | # Reshape_234__212
%"_val_184__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_182__212", %"_val_183__212") {allowzero=0}
1387 | # Constant_235__212
%"_val_185__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1388 | # Cast_236__212
%"_val_186__212"<INT64,?> ⬅️ ::Cast(%"_val_185__212") {to=7}
1389 | # Constant_237__212
%"_val_187__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1390 | # Reshape_238__212
%"_val_188__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_186__212", %"_val_187__212") {allowzero=0}
1391 | # Slice_239__212
%"slice_38__212"<FLOAT16,?> ⬅️ ::Slice(%"slice_37__212", %"_val_176__212", %"_val_180__212", %"_val_184__212", %"_val_188__212")
1392 | # Constant_240__212
%"_val_190__212"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
1393 | # n0__248
%"size_0__248"<INT64,[5]> ⬅️ ::Cast(%"_val_190__212") {to=7}
1394 | # n1__248
%"size_1__248"<INT64,[5]> ⬅️ ::Abs(%"size_0__248")
1395 | # n2__248
%"expand_4__212"<FLOAT16,?> ⬅️ ::Expand(%"slice_38__212", %"size_1__248")
1396 | # n0__249
%"clone_4__212"<FLOAT16,?> ⬅️ ::Identity(%"expand_4__212")
1397 | # Constant_243__212
%"_val_193__212"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
1398 | # n0__250
%"size_0__250"<INT64,[4]> ⬅️ ::Cast(%"_val_193__212") {to=7}
1399 | # n1__250
%"view_50__212"<FLOAT16,[unk__355,unk__356,unk__357,unk__358]> ⬅️ ::Reshape(%"clone_4__212", %"size_0__250")
1400 | # Constant_245__212
%"_val_195__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1401 | # Cast_246__212
%"_val_196__212"<INT64,?> ⬅️ ::Cast(%"_val_195__212") {to=7}
1402 | # Constant_247__212
%"_val_197__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1403 | # Reshape_248__212
%"_val_198__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_196__212", %"_val_197__212") {allowzero=0}
1404 | # Constant_249__212
%"_val_199__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1405 | # Cast_250__212
%"_val_200__212"<INT64,?> ⬅️ ::Cast(%"_val_199__212") {to=7}
1406 | # Constant_251__212
%"_val_201__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1407 | # Reshape_252__212
%"_val_202__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_200__212", %"_val_201__212") {allowzero=0}
1408 | # Constant_253__212
%"_val_203__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1409 | # Cast_254__212
%"_val_204__212"<INT64,?> ⬅️ ::Cast(%"_val_203__212") {to=7}
1410 | # Constant_255__212
%"_val_205__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1411 | # Reshape_256__212
%"_val_206__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_204__212", %"_val_205__212") {allowzero=0}
1412 | # Constant_257__212
%"_val_207__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1413 | # Cast_258__212
%"_val_208__212"<INT64,?> ⬅️ ::Cast(%"_val_207__212") {to=7}
1414 | # Constant_259__212
%"_val_209__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1415 | # Reshape_260__212
%"_val_210__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_208__212", %"_val_209__212") {allowzero=0}
1416 | # Slice_261__212
%"slice_39__212"<FLOAT16,[unk__359,unk__360,unk__361,unk__362]> ⬅️ ::Slice(%"model_1_4", %"_val_198__212", %"_val_202__212", %"_val_206__212", %"_val_210__212")
1417 | # Constant_262__212
%"_val_212__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1418 | # Cast_263__212
%"_val_213__212"<INT64,?> ⬅️ ::Cast(%"_val_212__212") {to=7}
1419 | # Constant_264__212
%"_val_214__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1420 | # Reshape_265__212
%"_val_215__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_213__212", %"_val_214__212") {allowzero=0}
1421 | # Constant_266__212
%"_val_216__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1422 | # Cast_267__212
%"_val_217__212"<INT64,?> ⬅️ ::Cast(%"_val_216__212") {to=7}
1423 | # Constant_268__212
%"_val_218__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1424 | # Reshape_269__212
%"_val_219__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_217__212", %"_val_218__212") {allowzero=0}
1425 | # Constant_270__212
%"_val_220__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1426 | # Cast_271__212
%"_val_221__212"<INT64,?> ⬅️ ::Cast(%"_val_220__212") {to=7}
1427 | # Constant_272__212
%"_val_222__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1428 | # Reshape_273__212
%"_val_223__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_221__212", %"_val_222__212") {allowzero=0}
1429 | # Constant_274__212
%"_val_224__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1430 | # Cast_275__212
%"_val_225__212"<INT64,?> ⬅️ ::Cast(%"_val_224__212") {to=7}
1431 | # Constant_276__212
%"_val_226__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1432 | # Reshape_277__212
%"_val_227__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_225__212", %"_val_226__212") {allowzero=0}
1433 | # Slice_278__212
%"slice_40__212"<FLOAT16,[unk__363,unk__364,unk__365,unk__366]> ⬅️ ::Slice(%"slice_39__212", %"_val_215__212", %"_val_219__212", %"_val_223__212", %"_val_227__212")
1434 | # n0__251
%"dim__251"<INT64,?> ⬅️ ::Constant() {value_int=2}
1435 | # n1__251
%"dim_0__251"<INT64,?> ⬅️ ::Cast(%"dim__251") {to=7}
1436 | # n2__251
%"unsqueeze_12__212"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_40__212", %"dim_0__251")
1437 | # Constant_280__212
%"_val_230__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1438 | # Cast_281__212
%"_val_231__212"<INT64,?> ⬅️ ::Cast(%"_val_230__212") {to=7}
1439 | # Constant_282__212
%"_val_232__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1440 | # Reshape_283__212
%"_val_233__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_231__212", %"_val_232__212") {allowzero=0}
1441 | # Constant_284__212
%"_val_234__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1442 | # Cast_285__212
%"_val_235__212"<INT64,?> ⬅️ ::Cast(%"_val_234__212") {to=7}
1443 | # Constant_286__212
%"_val_236__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1444 | # Reshape_287__212
%"_val_237__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_235__212", %"_val_236__212") {allowzero=0}
1445 | # Constant_288__212
%"_val_238__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1446 | # Cast_289__212
%"_val_239__212"<INT64,?> ⬅️ ::Cast(%"_val_238__212") {to=7}
1447 | # Constant_290__212
%"_val_240__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1448 | # Reshape_291__212
%"_val_241__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_239__212", %"_val_240__212") {allowzero=0}
1449 | # Constant_292__212
%"_val_242__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1450 | # Cast_293__212
%"_val_243__212"<INT64,?> ⬅️ ::Cast(%"_val_242__212") {to=7}
1451 | # Constant_294__212
%"_val_244__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1452 | # Reshape_295__212
%"_val_245__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_243__212", %"_val_244__212") {allowzero=0}
1453 | # Slice_296__212
%"slice_41__212"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_12__212", %"_val_233__212", %"_val_237__212", %"_val_241__212", %"_val_245__212")
1454 | # Constant_297__212
%"_val_247__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1455 | # Cast_298__212
%"_val_248__212"<INT64,?> ⬅️ ::Cast(%"_val_247__212") {to=7}
1456 | # Constant_299__212
%"_val_249__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1457 | # Reshape_300__212
%"_val_250__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_248__212", %"_val_249__212") {allowzero=0}
1458 | # Constant_301__212
%"_val_251__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1459 | # Cast_302__212
%"_val_252__212"<INT64,?> ⬅️ ::Cast(%"_val_251__212") {to=7}
1460 | # Constant_303__212
%"_val_253__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1461 | # Reshape_304__212
%"_val_254__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_252__212", %"_val_253__212") {allowzero=0}
1462 | # Constant_305__212
%"_val_255__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1463 | # Cast_306__212
%"_val_256__212"<INT64,?> ⬅️ ::Cast(%"_val_255__212") {to=7}
1464 | # Constant_307__212
%"_val_257__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1465 | # Reshape_308__212
%"_val_258__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_256__212", %"_val_257__212") {allowzero=0}
1466 | # Constant_309__212
%"_val_259__212"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1467 | # Cast_310__212
%"_val_260__212"<INT64,?> ⬅️ ::Cast(%"_val_259__212") {to=7}
1468 | # Constant_311__212
%"_val_261__212"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1469 | # Reshape_312__212
%"_val_262__212"<INT64,[1]> ⬅️ ::Reshape(%"_val_260__212", %"_val_261__212") {allowzero=0}
1470 | # Slice_313__212
%"slice_42__212"<FLOAT16,?> ⬅️ ::Slice(%"slice_41__212", %"_val_250__212", %"_val_254__212", %"_val_258__212", %"_val_262__212")
1471 | # Constant_314__212
%"_val_264__212"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
1472 | # n0__252
%"size_0__252"<INT64,[5]> ⬅️ ::Cast(%"_val_264__212") {to=7}
1473 | # n1__252
%"size_1__252"<INT64,[5]> ⬅️ ::Abs(%"size_0__252")
1474 | # n2__252
%"expand_5__212"<FLOAT16,?> ⬅️ ::Expand(%"slice_42__212", %"size_1__252")
1475 | # n0__253
%"clone_5__212"<FLOAT16,?> ⬅️ ::Identity(%"expand_5__212")
1476 | # Constant_317__212
%"_val_267__212"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
1477 | # n0__254
%"size_0__254"<INT64,[4]> ⬅️ ::Cast(%"_val_267__212") {to=7}
1478 | # n1__254
%"view_51__212"<FLOAT16,[unk__367,unk__368,unk__369,unk__370]> ⬅️ ::Reshape(%"clone_5__212", %"size_0__254")
1479 | # n0__255
%"tmp__255"<INT64,[unk__371]> ⬅️ ::Shape(%"add_13__212")
1480 | # n1__255
%"int64_m1__255"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_m1')}
1481 | # n2__255
%"tmp_subscripted__255"<INT64,?> ⬅️ ::Gather(%"tmp__255", %"int64_m1__255") {axis=0}
1482 | # n3__255
%"embedding_size__255"<FLOAT16,?> ⬅️ ::CastLike(%"tmp_subscripted__255", %"add_13__212")
1483 | # n4__255
%"const__255"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
1484 | # n5__255
%"tmp_0__255"<FLOAT16,?> ⬅️ ::Sqrt(%"embedding_size__255")
1485 | # n6__255
%"const_cast__255"<FLOAT16,?> ⬅️ ::CastLike(%"const__255", %"tmp_0__255")
1486 | # n7__255
%"_val_269__212"<FLOAT16,?> ⬅️ ::Div(%"const_cast__255", %"tmp_0__255")
1487 | # CastLike_320__212
%"_val_270__212"<FLOAT16,?> ⬅️ ::CastLike(%"_val_269__212", %"add_13__212")
1488 | # n0__256
%"tmp__256"<INT64,[unk__372]> ⬅️ ::Shape(%"add_13__212")
1489 | # n1__256
%"int64_0_1d__256"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
1490 | # n2__256
%"int64_1_1d__256"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
1491 | # n3__256
%"int64_m2_1d__256"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
1492 | # n4__256
%"int64_m1_1d__256"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
1493 | # n5__256
%"target_length__256"<INT64,[unk__373]> ⬅️ ::Slice(%"tmp__256", %"int64_m2_1d__256", %"int64_m1_1d__256", %"int64_0_1d__256", %"int64_1_1d__256")
1494 | # n6__256
%"tmp_0__256"<INT64,[4]> ⬅️ ::Shape(%"view_50__212")
1495 | # n7__256
%"int64_0_1d_1__256"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
1496 | # n8__256
%"int64_1_1d_2__256"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
1497 | # n9__256
%"int64_m2_1d_3__256"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_3')}
1498 | # n10__256
%"int64_m1_1d_4__256"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_4')}
1499 | # n11__256
%"source_length__256"<INT64,[1]> ⬅️ ::Slice(%"tmp_0__256", %"int64_m2_1d_3__256", %"int64_m1_1d_4__256", %"int64_0_1d_1__256", %"int64_1_1d_2__256")
1500 | # n12__256
%"size__256"<INT64,[unk__374]> ⬅️ ::Concat(%"target_length__256", %"source_length__256") {axis=0}
1501 | # n13__256
%"const__256"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
1502 | # n14__256
%"attn_mask__256"<FLOAT,?> ⬅️ ::Expand(%"const__256", %"size__256")
1503 | # n15__256
%"attn_mask_5__256"<FLOAT,?> ⬅️ ::Trilu(%"attn_mask__256") {upper=0}
1504 | # n16__256
%"const_6__256"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_6')}
1505 | # n17__256
%"const_6_cast__256"<FLOAT,?> ⬅️ ::CastLike(%"const_6__256", %"attn_mask_5__256")
1506 | # n18__256
%"tmp_7__256"<BOOL,?> ⬅️ ::Equal(%"attn_mask_5__256", %"const_6_cast__256")
1507 | # n19__256
%"tmp_8__256"<FLOAT,?> ⬅️ ::Constant() {value_float=-inf}
1508 | # n20__256
%"const_9__256"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
1509 | # n21__256
%"const_9_cast__256"<FLOAT,?> ⬅️ ::CastLike(%"const_9__256", %"tmp_8__256")
1510 | # n22__256
%"attn_mask_10__256"<FLOAT,?> ⬅️ ::Where(%"tmp_7__256", %"tmp_8__256", %"const_9_cast__256")
1511 | # n23__256
%"_val_271__212"<FLOAT16,?> ⬅️ ::CastLike(%"attn_mask_10__256", %"add_13__212")
1512 | # n0__257
%"key_shape__257"<INT64,[4]> ⬅️ ::Shape(%"view_50__212")
1513 | # n1__257
%"int64_0_1d__257"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
1514 | # n2__257
%"int64_1_1d__257"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
1515 | # n3__257
%"int64_m1_1d__257"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
1516 | # n4__257
%"int64_9223372036854775807_1d__257"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_9223372036854775807_1d')}
1517 | # n5__257
%"key_last_dim__257"<INT64,[1]> ⬅️ ::Slice(%"key_shape__257", %"int64_m1_1d__257", %"int64_9223372036854775807_1d__257", %"int64_0_1d__257", %"int64_1_1d__257")
1518 | # n6__257
%"int64_0_1d_0__257"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_0')}
1519 | # n7__257
%"int64_1_1d_1__257"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_1')}
1520 | # n8__257
%"int64_m2_1d__257"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
1521 | # n9__257
%"int64_m1_1d_2__257"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_2')}
1522 | # n10__257
%"key_second_last_dim__257"<INT64,[1]> ⬅️ ::Slice(%"key_shape__257", %"int64_m2_1d__257", %"int64_m1_1d_2__257", %"int64_0_1d_0__257", %"int64_1_1d_1__257")
1523 | # n11__257
%"int64_0_1d_3__257"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
1524 | # n12__257
%"int64_1_1d_4__257"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
1525 | # n13__257
%"int64_m2_1d_5__257"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_5')}
1526 | # n14__257
%"key_first_dims__257"<INT64,[2]> ⬅️ ::Slice(%"key_shape__257", %"int64_0_1d_3__257", %"int64_m2_1d_5__257", %"int64_0_1d_3__257", %"int64_1_1d_4__257")
1527 | # n15__257
%"tmp__257"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1528 | # n16__257
%"key_squeezed_shape__257"<INT64,[3]> ⬅️ ::Concat(%"tmp__257", %"key_second_last_dim__257", %"key_last_dim__257") {axis=0}
1529 | # n17__257
%"key_squeezed__257"<FLOAT16,[unk__375,unk__376,unk__377]> ⬅️ ::Reshape(%"view_50__212", %"key_squeezed_shape__257")
1530 | # n18__257
%"key_squeezed_transposed__257"<FLOAT16,[unk__375,unk__377,unk__376]> ⬅️ ::Transpose(%"key_squeezed__257") {perm=[0, 2, 1]}
1531 | # n19__257
%"key_transposed_shape__257"<INT64,[4]> ⬅️ ::Concat(%"key_first_dims__257", %"key_last_dim__257", %"key_second_last_dim__257") {axis=0}
1532 | # n20__257
%"key_transposed__257"<FLOAT16,[unk__378,unk__379,unk__380,unk__381]> ⬅️ ::Reshape(%"key_squeezed_transposed__257", %"key_transposed_shape__257")
1533 | # n21__257
%"tmp_6__257"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__212")
1534 | # n22__257
%"query_scaled__257"<FLOAT16,?> ⬅️ ::Mul(%"add_13__212", %"tmp_6__257")
1535 | # n23__257
%"tmp_7__257"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__212")
1536 | # n24__257
%"key_transposed_scaled__257"<FLOAT16,[unk__378,unk__379,unk__380,unk__381]> ⬅️ ::Mul(%"key_transposed__257", %"tmp_7__257")
1537 | # n25__257
%"tmp_8__257"<FLOAT16,?> ⬅️ ::MatMul(%"query_scaled__257", %"key_transposed_scaled__257")
1538 | # n26__257
%"tmp_9__257"<FLOAT16,?> ⬅️ ::Add(%"tmp_8__257", %"_val_271__212")
1539 | # n27__257
%"attn_weight__257"<FLOAT16,?> ⬅️ ::Softmax(%"tmp_9__257") {axis=-1}
1540 | # n28__257
%"dropout_p__257"<FLOAT,?> ⬅️ ::Constant() {value_float=0.0}
1541 | # n29__257
%"attn_weight_10__257"<FLOAT16,?>, %"___257"<BOOL,?> ⬅️ ::Dropout(%"attn_weight__257", %"dropout_p__257")
1542 | # n30__257
%"_scaled_dot_product_efficient_attention_2__212"<FLOAT16,?> ⬅️ ::MatMul(%"attn_weight_10__257", %"view_51__212")
1543 | # n0__258
%"query_0__258"<FLOAT16,?> ⬅️ ::Transpose(%"add_13__212") {perm=[0, 2, 1, 3]}
1544 | # n1__258
%"query_shape__258"<INT64,[unk__382]> ⬅️ ::Shape(%"query_0__258")
1545 | # n2__258
%"int64_0_1d__258"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
1546 | # n3__258
%"int64_1_1d__258"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
1547 | # n4__258
%"query_first_dims__258"<INT64,[unk__383]> ⬅️ ::Slice(%"query_shape__258", %"int64_0_1d__258", %"int64_1_1d__258", %"int64_0_1d__258", %"int64_1_1d__258")
1548 | # n5__258
%"int64_0_1d_1__258"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
1549 | # n6__258
%"int64_1_1d_2__258"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
1550 | # n7__258
%"int64_2_1d__258"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_2_1d')}
1551 | # n8__258
%"query_second_dims__258"<INT64,[unk__384]> ⬅️ ::Slice(%"query_shape__258", %"int64_1_1d_2__258", %"int64_2_1d__258", %"int64_0_1d_1__258", %"int64_1_1d_2__258")
1552 | # n9__258
%"int64_0_1d_3__258"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
1553 | # n10__258
%"int64_1_1d_4__258"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
1554 | # n11__258
%"int64_m2_1d__258"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
1555 | # n12__258
%"int64_m1_1d__258"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
1556 | # n13__258
%"num_heads__258"<INT64,[unk__385]> ⬅️ ::Slice(%"query_shape__258", %"int64_m2_1d__258", %"int64_m1_1d__258", %"int64_0_1d_3__258", %"int64_1_1d_4__258")
1557 | # n14__258
%"compute_log_sumexp__258"<INT64,?> ⬅️ ::Constant() {value_int=0}
1558 | # n15__258
%"compute_log_sumexp_as_bool__258"<BOOL,?> ⬅️ ::Cast(%"compute_log_sumexp__258") {to=9}
1559 | # n16__258
%"_scaled_dot_product_efficient_attention_2_1__212"<FLOAT,?> ⬅️ ::If(%"compute_log_sumexp_as_bool__258") {then_branch=
graph(
name=thenGraph_14,
inputs=(
),
outputs=(
%"logsum_exp__258"<FLOAT,?>
),
) {
0 | # n0__258_98
%"tmp__258"<FLOAT,[unk__384]> ⬅️ ::Cast(%"query_second_dims__258") {to=1}
1 | # n1__258_99
%"const__258"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
2 | # n2__258_100
%"const_cast__258"<FLOAT,?> ⬅️ ::CastLike(%"const__258", %"tmp__258")
3 | # n3__258_101
%"tmp_5__258"<FLOAT,[unk__384]> ⬅️ ::Div(%"tmp__258", %"const_cast__258")
4 | # n4__258_102
%"tmp_6__258"<FLOAT,[unk__384]> ⬅️ ::Ceil(%"tmp_5__258")
5 | # n5__258_103
%"const_7__258"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_7')}
6 | # n6__258_104
%"const_7_cast__258"<FLOAT,?> ⬅️ ::CastLike(%"const_7__258", %"tmp_6__258")
7 | # n7__258_105
%"tmp_8__258"<FLOAT,[unk__384]> ⬅️ ::Mul(%"tmp_6__258", %"const_7_cast__258")
8 | # n8__258_106
%"logsumexp_dim__258"<INT64,[unk__384]> ⬅️ ::Cast(%"tmp_8__258") {to=7}
9 | # n9__258_107
%"const_9__258"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
10 | # n10__258_108
%"tmp_10__258"<INT64,[unk__386]> ⬅️ ::Concat(%"query_first_dims__258", %"num_heads__258", %"logsumexp_dim__258") {axis=0}
11 | # n11__258_109
%"logsum_exp__258"<FLOAT,?> ⬅️ ::Expand(%"const_9__258", %"tmp_10__258")
return %"logsum_exp__258"<FLOAT,?>
}, else_branch=
graph(
name=elseGraph_14,
inputs=(
),
outputs=(
%"logsum_exp_14__258"<FLOAT,?>
),
) {
0 | # n0__258_110
%"const_11__258"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_11')}
1 | # n1__258_111
%"int64_0_1d_12__258"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_12')}
2 | # n2__258_112
%"int64_0_1d_12_cast__258"<INT64,[1]> ⬅️ ::CastLike(%"int64_0_1d_12__258", %"num_heads__258")
3 | # n3__258_113
%"tmp_13__258"<INT64,[unk__387]> ⬅️ ::Concat(%"query_first_dims__258", %"num_heads__258", %"int64_0_1d_12_cast__258") {axis=0}
4 | # n4__258_114
%"logsum_exp_14__258"<FLOAT,?> ⬅️ ::Expand(%"const_11__258", %"tmp_13__258")
return %"logsum_exp_14__258"<FLOAT,?>
}}
1560 | # n17__258
%"tmp_16__258"<INT64,[0]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[0]>('Empty_INTS')}
1561 | # n18__258
%"tmp_17__258"<FLOAT,?> ⬅️ ::ConstantOfShape(%"tmp_16__258")
1562 | # n19__258
%"_scaled_dot_product_efficient_attention_2_3__212"<INT64,?> ⬅️ ::Cast(%"tmp_17__258") {to=7}
1563 | # Transpose_324__212
%"transpose_11__212"<FLOAT16,?> ⬅️ ::Transpose(%"_scaled_dot_product_efficient_attention_2__212") {perm=[0, 2, 1, 3]}
1564 | # Constant_325__212
%"_val_276__212"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1565 | # n0__259
%"size_0__259"<INT64,[3]> ⬅️ ::Cast(%"_val_276__212") {to=7}
1566 | # n1__259
%"view_52__212"<FLOAT16,[unk__388,unk__389,unk__390]> ⬅️ ::Reshape(%"transpose_11__212", %"size_0__259")
1567 | # n0__262
%"tmp__262"<INT64,[2]> ⬅️ ::Shape(%"model.layers.2.self_attn.o_proj.weight")
1568 | # n1__262
%"rank__261"<INT64,?> ⬅️ ::Size(%"tmp__262")
1569 | # n1__261
%"int64_2__261"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
1570 | # n2__261
%"int64_2_cast__261"<INT64,?> ⬅️ ::CastLike(%"int64_2__261", %"rank__261")
1571 | # n3__261
%"cond__261"<BOOL,?> ⬅️ ::Equal(%"rank__261", %"int64_2_cast__261")
1572 | # n4__261
%"t_17__260"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__261") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__261"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__261_115
%"result__261"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.2.self_attn.o_proj.weight") {perm=[1, 0]}
return %"result__261"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__261"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__261_116
%"result_0__261"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.2.self_attn.o_proj.weight")
return %"result_0__261"<FLOAT16,[4096,4096]>
}}
1573 | # Constant_3__260
%"_val_3__260"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
1574 | # n0__263
%"size_0__263"<INT64,[2]> ⬅️ ::Cast(%"_val_3__260") {to=7}
1575 | # n1__263
%"view_53__260"<FLOAT16,[unk__391,unk__392]> ⬅️ ::Reshape(%"view_52__212", %"size_0__263")
1576 | # n0__264
%"mm_17__260"<FLOAT16,[unk__391,4096]> ⬅️ ::MatMul(%"view_53__260", %"t_17__260")
1577 | # Constant_6__260
%"_val_6__260"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1578 | # n0__265
%"size_0__265"<INT64,[3]> ⬅️ ::Cast(%"_val_6__260") {to=7}
1579 | # n1__265
%"model_layers_2_self_attn_1_2__202"<FLOAT16,[unk__393,unk__394,unk__395]> ⬅️ ::Reshape(%"mm_17__260", %"size_0__265")
1580 | # n0__266
%"alpha__266"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
1581 | # n1__266
%"alpha_0__266"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__266", %"model_layers_2_self_attn_1_2__202")
1582 | # n2__266
%"other_1__266"<FLOAT16,[unk__393,unk__394,unk__395]> ⬅️ ::Mul(%"model_layers_2_self_attn_1_2__202", %"alpha_0__266")
1583 | # n3__266
%"add_15__202"<FLOAT16,[unk__396,128,4096]> ⬅️ ::Add(%"model_layers_1_1_2__1", %"other_1__266")
1584 | # Cast_3__267
%"_to_copy_14__267"<FLOAT,[unk__396,128,4096]> ⬅️ ::Cast(%"add_15__202") {to=1}
1585 | # Constant_4__267
%"_val_2__267"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1586 | # Cast_5__267
%"scalar_tensor_default_5__267"<FLOAT,?> ⬅️ ::Cast(%"_val_2__267") {to=1}
1587 | # n0__268
%"pow_6__267"<FLOAT,[unk__396,128,4096]> ⬅️ ::Pow(%"_to_copy_14__267", %"scalar_tensor_default_5__267")
1588 | # Constant_7__267
%"_val_5__267"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
1589 | # n0__270
%"tmp__270"<INT64,[3]> ⬅️ ::Shape(%"pow_6__267")
1590 | # n1__270
%"tmp_0__270"<INT64,?> ⬅️ ::Size(%"tmp__270")
1591 | # n2__270
%"tmp_1__270"<INT64,?> ⬅️ ::Constant() {value_int=0}
1592 | # n3__270
%"cond__269"<BOOL,?> ⬅️ ::Equal(%"tmp_0__270", %"tmp_1__270")
1593 | # n1__269
%"mean_5__267"<FLOAT,?> ⬅️ ::If(%"cond__269") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__269"<FLOAT,[unk__396,128,4096]>
),
) {
0 | # n0__269_117
%"result__269"<FLOAT,[unk__396,128,4096]> ⬅️ ::Identity(%"pow_6__267")
return %"result__269"<FLOAT,[unk__396,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__269"<FLOAT,?>
),
) {
0 | # n0__271
%"tmp__271"<INT64,[1]> ⬅️ ::Shape(%"_val_5__267")
1 | # n1__271
%"tmp_0__271"<INT64,?> ⬅️ ::Size(%"tmp__271")
2 | # n2__271
%"tmp_1__271"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__271
%"cond_0__269"<BOOL,?> ⬅️ ::Equal(%"tmp_0__271", %"tmp_1__271")
4 | # n1__269_119
%"dim_3__269"<INT64,?> ⬅️ ::If(%"cond_0__269") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__269"<INT64,[1,1]>
),
) {
0 | # n0__269_120
%"int64_0__269"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__269_121
%"dim_1__269"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__267", %"int64_0__269")
return %"dim_1__269"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__269"<INT64,[1]>
),
) {
0 | # n0__269_122
%"dim_2__269"<INT64,[1]> ⬅️ ::Identity(%"_val_5__267")
return %"dim_2__269"<INT64,[1]>
}}
5 | # n2__269
%"result_4__269"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_6__267", %"dim_3__269") {keepdims=1}
return %"result_4__269"<FLOAT,?>
}}
1594 | # Constant_9__267
%"_val_7__267"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
1595 | # n0__272
%"alpha__272"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
1596 | # n1__272
%"alpha_0__272"<FLOAT,?> ⬅️ ::CastLike(%"alpha__272", %"_val_7__267")
1597 | # n2__272
%"other_1__272"<FLOAT,?> ⬅️ ::Mul(%"_val_7__267", %"alpha_0__272")
1598 | # n3__272
%"add_16__267"<FLOAT,?> ⬅️ ::Add(%"mean_5__267", %"other_1__272")
1599 | # n0__273
%"tmp__273"<FLOAT,?> ⬅️ ::Sqrt(%"add_16__267")
1600 | # n1__273
%"rsqrt_5__267"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__273")
1601 | # n0__274
%"mul_26__267"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_14__267", %"rsqrt_5__267")
1602 | # Cast_13__267
%"_to_copy_15__267"<FLOAT16,?> ⬅️ ::Cast(%"mul_26__267") {to=10}
1603 | # n0__275
%"model_layers_2_post_attention_layernorm_1__202"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.2.post_attention_layernorm.weight", %"_to_copy_15__267")
1604 | # n0__279
%"tmp__279"<INT64,[2]> ⬅️ ::Shape(%"model.layers.2.mlp.gate_proj.weight")
1605 | # n1__279
%"rank__278"<INT64,?> ⬅️ ::Size(%"tmp__279")
1606 | # n1__278
%"int64_2__278"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
1607 | # n2__278
%"int64_2_cast__278"<INT64,?> ⬅️ ::CastLike(%"int64_2__278", %"rank__278")
1608 | # n3__278
%"cond__278"<BOOL,?> ⬅️ ::Equal(%"rank__278", %"int64_2_cast__278")
1609 | # n4__278
%"t_18__277"<FLOAT16,[unk__397,unk__398]> ⬅️ ::If(%"cond__278") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__278"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__278_123
%"result__278"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.2.mlp.gate_proj.weight") {perm=[1, 0]}
return %"result__278"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__278"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__278_124
%"result_0__278"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.2.mlp.gate_proj.weight")
return %"result_0__278"<FLOAT16,[14336,4096]>
}}
1610 | # Constant_3__277
%"_val_3__277"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
1611 | # n0__280
%"size_0__280"<INT64,[2]> ⬅️ ::Cast(%"_val_3__277") {to=7}
1612 | # n1__280
%"view_55__277"<FLOAT16,[unk__399,unk__400]> ⬅️ ::Reshape(%"model_layers_2_post_attention_layernorm_1__202", %"size_0__280")
1613 | # n0__281
%"mm_18__277"<FLOAT16,[unk__399,unk__398]> ⬅️ ::MatMul(%"view_55__277", %"t_18__277")
1614 | # Constant_6__277
%"_val_6__277"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1615 | # n0__282
%"size_0__282"<INT64,[3]> ⬅️ ::Cast(%"_val_6__277") {to=7}
1616 | # n1__282
%"model_layers_2_mlp_gate_proj_1__276"<FLOAT16,[unk__401,unk__402,unk__403]> ⬅️ ::Reshape(%"mm_18__277", %"size_0__282")
1617 | # Cast_0__283
%"_to_copy_16__283"<FLOAT,[unk__401,unk__402,unk__403]> ⬅️ ::Cast(%"model_layers_2_mlp_gate_proj_1__276") {to=1}
1618 | # n0__284
%"sigmoid_2__283"<FLOAT,[unk__401,unk__402,unk__403]> ⬅️ ::Sigmoid(%"_to_copy_16__283")
1619 | # n0__285
%"mul_28__283"<FLOAT,[unk__401,unk__402,unk__403]> ⬅️ ::Mul(%"_to_copy_16__283", %"sigmoid_2__283")
1620 | # Cast_3__283
%"model_layers_2_mlp_act_fn_1__276"<FLOAT16,[unk__401,unk__402,unk__403]> ⬅️ ::Cast(%"mul_28__283") {to=10}
1621 | # n0__288
%"tmp__288"<INT64,[2]> ⬅️ ::Shape(%"model.layers.2.mlp.up_proj.weight")
1622 | # n1__288
%"rank__287"<INT64,?> ⬅️ ::Size(%"tmp__288")
1623 | # n1__287
%"int64_2__287"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
1624 | # n2__287
%"int64_2_cast__287"<INT64,?> ⬅️ ::CastLike(%"int64_2__287", %"rank__287")
1625 | # n3__287
%"cond__287"<BOOL,?> ⬅️ ::Equal(%"rank__287", %"int64_2_cast__287")
1626 | # n4__287
%"t_19__286"<FLOAT16,[unk__404,unk__405]> ⬅️ ::If(%"cond__287") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__287"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__287_125
%"result__287"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.2.mlp.up_proj.weight") {perm=[1, 0]}
return %"result__287"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__287"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__287_126
%"result_0__287"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.2.mlp.up_proj.weight")
return %"result_0__287"<FLOAT16,[14336,4096]>
}}
1627 | # Constant_3__286
%"_val_3__286"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
1628 | # n0__289
%"size_0__289"<INT64,[2]> ⬅️ ::Cast(%"_val_3__286") {to=7}
1629 | # n1__289
%"view_57__286"<FLOAT16,[unk__406,unk__407]> ⬅️ ::Reshape(%"model_layers_2_post_attention_layernorm_1__202", %"size_0__289")
1630 | # n0__290
%"mm_19__286"<FLOAT16,[unk__406,unk__405]> ⬅️ ::MatMul(%"view_57__286", %"t_19__286")
1631 | # Constant_6__286
%"_val_6__286"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1632 | # n0__291
%"size_0__291"<INT64,[3]> ⬅️ ::Cast(%"_val_6__286") {to=7}
1633 | # n1__291
%"model_layers_2_mlp_up_proj_1__276"<FLOAT16,[unk__408,unk__409,unk__410]> ⬅️ ::Reshape(%"mm_19__286", %"size_0__291")
1634 | # n0__292
%"mul_29__276"<FLOAT16,[unk__411,unk__412,unk__413]> ⬅️ ::Mul(%"model_layers_2_mlp_act_fn_1__276", %"model_layers_2_mlp_up_proj_1__276")
1635 | # n0__295
%"tmp__295"<INT64,[2]> ⬅️ ::Shape(%"model.layers.2.mlp.down_proj.weight")
1636 | # n1__295
%"rank__294"<INT64,?> ⬅️ ::Size(%"tmp__295")
1637 | # n1__294
%"int64_2__294"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
1638 | # n2__294
%"int64_2_cast__294"<INT64,?> ⬅️ ::CastLike(%"int64_2__294", %"rank__294")
1639 | # n3__294
%"cond__294"<BOOL,?> ⬅️ ::Equal(%"rank__294", %"int64_2_cast__294")
1640 | # n4__294
%"t_20__293"<FLOAT16,[unk__414,unk__415]> ⬅️ ::If(%"cond__294") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__294"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__294_127
%"result__294"<FLOAT16,[14336,4096]> ⬅️ ::Transpose(%"model.layers.2.mlp.down_proj.weight") {perm=[1, 0]}
return %"result__294"<FLOAT16,[14336,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__294"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__294_128
%"result_0__294"<FLOAT16,[4096,14336]> ⬅️ ::Identity(%"model.layers.2.mlp.down_proj.weight")
return %"result_0__294"<FLOAT16,[4096,14336]>
}}
1641 | # Constant_3__293
%"_val_3__293"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
1642 | # n0__296
%"size_0__296"<INT64,[2]> ⬅️ ::Cast(%"_val_3__293") {to=7}
1643 | # n1__296
%"view_59__293"<FLOAT16,[unk__416,unk__417]> ⬅️ ::Reshape(%"mul_29__276", %"size_0__296")
1644 | # n0__297
%"mm_20__293"<FLOAT16,[unk__416,unk__415]> ⬅️ ::MatMul(%"view_59__293", %"t_20__293")
1645 | # Constant_6__293
%"_val_6__293"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1646 | # n0__298
%"size_0__298"<INT64,[3]> ⬅️ ::Cast(%"_val_6__293") {to=7}
1647 | # n1__298
%"model_layers_2_mlp_1__202"<FLOAT16,[unk__418,unk__419,unk__420]> ⬅️ ::Reshape(%"mm_20__293", %"size_0__298")
1648 | # n0__299
%"alpha__299"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
1649 | # n1__299
%"alpha_0__299"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__299", %"model_layers_2_mlp_1__202")
1650 | # n2__299
%"other_1__299"<FLOAT16,[unk__418,unk__419,unk__420]> ⬅️ ::Mul(%"model_layers_2_mlp_1__202", %"alpha_0__299")
1651 | # n3__299
%"model_layers_2_1_2__1"<FLOAT16,[unk__421,128,4096]> ⬅️ ::Add(%"add_15__202", %"other_1__299")
1652 | # Cast_3__301
%"_to_copy_18__301"<FLOAT,[unk__421,128,4096]> ⬅️ ::Cast(%"model_layers_2_1_2__1") {to=1}
1653 | # Constant_4__301
%"_val_2__301"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1654 | # Cast_5__301
%"scalar_tensor_default_6__301"<FLOAT,?> ⬅️ ::Cast(%"_val_2__301") {to=1}
1655 | # n0__302
%"pow_7__301"<FLOAT,[unk__421,128,4096]> ⬅️ ::Pow(%"_to_copy_18__301", %"scalar_tensor_default_6__301")
1656 | # Constant_7__301
%"_val_5__301"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
1657 | # n0__304
%"tmp__304"<INT64,[3]> ⬅️ ::Shape(%"pow_7__301")
1658 | # n1__304
%"tmp_0__304"<INT64,?> ⬅️ ::Size(%"tmp__304")
1659 | # n2__304
%"tmp_1__304"<INT64,?> ⬅️ ::Constant() {value_int=0}
1660 | # n3__304
%"cond__303"<BOOL,?> ⬅️ ::Equal(%"tmp_0__304", %"tmp_1__304")
1661 | # n1__303
%"mean_6__301"<FLOAT,?> ⬅️ ::If(%"cond__303") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__303"<FLOAT,[unk__421,128,4096]>
),
) {
0 | # n0__303_129
%"result__303"<FLOAT,[unk__421,128,4096]> ⬅️ ::Identity(%"pow_7__301")
return %"result__303"<FLOAT,[unk__421,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__303"<FLOAT,?>
),
) {
0 | # n0__305
%"tmp__305"<INT64,[1]> ⬅️ ::Shape(%"_val_5__301")
1 | # n1__305
%"tmp_0__305"<INT64,?> ⬅️ ::Size(%"tmp__305")
2 | # n2__305
%"tmp_1__305"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__305
%"cond_0__303"<BOOL,?> ⬅️ ::Equal(%"tmp_0__305", %"tmp_1__305")
4 | # n1__303_131
%"dim_3__303"<INT64,?> ⬅️ ::If(%"cond_0__303") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__303"<INT64,[1,1]>
),
) {
0 | # n0__303_132
%"int64_0__303"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__303_133
%"dim_1__303"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__301", %"int64_0__303")
return %"dim_1__303"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__303"<INT64,[1]>
),
) {
0 | # n0__303_134
%"dim_2__303"<INT64,[1]> ⬅️ ::Identity(%"_val_5__301")
return %"dim_2__303"<INT64,[1]>
}}
5 | # n2__303
%"result_4__303"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_7__301", %"dim_3__303") {keepdims=1}
return %"result_4__303"<FLOAT,?>
}}
1662 | # Constant_9__301
%"_val_7__301"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
1663 | # n0__306
%"alpha__306"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
1664 | # n1__306
%"alpha_0__306"<FLOAT,?> ⬅️ ::CastLike(%"alpha__306", %"_val_7__301")
1665 | # n2__306
%"other_1__306"<FLOAT,?> ⬅️ ::Mul(%"_val_7__301", %"alpha_0__306")
1666 | # n3__306
%"add_18__301"<FLOAT,?> ⬅️ ::Add(%"mean_6__301", %"other_1__306")
1667 | # n0__307
%"tmp__307"<FLOAT,?> ⬅️ ::Sqrt(%"add_18__301")
1668 | # n1__307
%"rsqrt_6__301"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__307")
1669 | # n0__308
%"mul_30__301"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_18__301", %"rsqrt_6__301")
1670 | # Cast_13__301
%"_to_copy_19__301"<FLOAT16,?> ⬅️ ::Cast(%"mul_30__301") {to=10}
1671 | # n0__309
%"model_layers_3_input_layernorm_1__300"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.3.input_layernorm.weight", %"_to_copy_19__301")
1672 | # n0__313
%"tmp__313"<INT64,[2]> ⬅️ ::Shape(%"model.layers.3.self_attn.q_proj.weight")
1673 | # n1__313
%"rank__312"<INT64,?> ⬅️ ::Size(%"tmp__313")
1674 | # n1__312
%"int64_2__312"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
1675 | # n2__312
%"int64_2_cast__312"<INT64,?> ⬅️ ::CastLike(%"int64_2__312", %"rank__312")
1676 | # n3__312
%"cond__312"<BOOL,?> ⬅️ ::Equal(%"rank__312", %"int64_2_cast__312")
1677 | # n4__312
%"t_21__311"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__312") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__312"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__312_135
%"result__312"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.3.self_attn.q_proj.weight") {perm=[1, 0]}
return %"result__312"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__312"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__312_136
%"result_0__312"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.3.self_attn.q_proj.weight")
return %"result_0__312"<FLOAT16,[4096,4096]>
}}
1678 | # Constant_3__311
%"_val_3__311"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
1679 | # n0__314
%"size_0__314"<INT64,[2]> ⬅️ ::Cast(%"_val_3__311") {to=7}
1680 | # n1__314
%"view_61__311"<FLOAT16,[unk__422,unk__423]> ⬅️ ::Reshape(%"model_layers_3_input_layernorm_1__300", %"size_0__314")
1681 | # n0__315
%"mm_21__311"<FLOAT16,[unk__422,4096]> ⬅️ ::MatMul(%"view_61__311", %"t_21__311")
1682 | # Constant_6__311
%"_val_6__311"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1683 | # n0__316
%"size_0__316"<INT64,[3]> ⬅️ ::Cast(%"_val_6__311") {to=7}
1684 | # n1__316
%"model_layers_3_self_attn_q_proj_1__310"<FLOAT16,[unk__424,unk__425,unk__426]> ⬅️ ::Reshape(%"mm_21__311", %"size_0__316")
1685 | # n0__319
%"tmp__319"<INT64,[2]> ⬅️ ::Shape(%"model.layers.3.self_attn.k_proj.weight")
1686 | # n1__319
%"rank__318"<INT64,?> ⬅️ ::Size(%"tmp__319")
1687 | # n1__318
%"int64_2__318"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
1688 | # n2__318
%"int64_2_cast__318"<INT64,?> ⬅️ ::CastLike(%"int64_2__318", %"rank__318")
1689 | # n3__318
%"cond__318"<BOOL,?> ⬅️ ::Equal(%"rank__318", %"int64_2_cast__318")
1690 | # n4__318
%"t_22__317"<FLOAT16,[unk__427,unk__428]> ⬅️ ::If(%"cond__318") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__318"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__318_137
%"result__318"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.3.self_attn.k_proj.weight") {perm=[1, 0]}
return %"result__318"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__318"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__318_138
%"result_0__318"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.3.self_attn.k_proj.weight")
return %"result_0__318"<FLOAT16,[1024,4096]>
}}
1691 | # Constant_3__317
%"_val_3__317"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
1692 | # n0__320
%"size_0__320"<INT64,[2]> ⬅️ ::Cast(%"_val_3__317") {to=7}
1693 | # n1__320
%"view_63__317"<FLOAT16,[unk__429,unk__430]> ⬅️ ::Reshape(%"model_layers_3_input_layernorm_1__300", %"size_0__320")
1694 | # n0__321
%"mm_22__317"<FLOAT16,[unk__429,unk__428]> ⬅️ ::MatMul(%"view_63__317", %"t_22__317")
1695 | # Constant_6__317
%"_val_6__317"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1696 | # n0__322
%"size_0__322"<INT64,[3]> ⬅️ ::Cast(%"_val_6__317") {to=7}
1697 | # n1__322
%"model_layers_3_self_attn_k_proj_1__310"<FLOAT16,[unk__431,unk__432,unk__433]> ⬅️ ::Reshape(%"mm_22__317", %"size_0__322")
1698 | # n0__325
%"tmp__325"<INT64,[2]> ⬅️ ::Shape(%"model.layers.3.self_attn.v_proj.weight")
1699 | # n1__325
%"rank__324"<INT64,?> ⬅️ ::Size(%"tmp__325")
1700 | # n1__324
%"int64_2__324"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
1701 | # n2__324
%"int64_2_cast__324"<INT64,?> ⬅️ ::CastLike(%"int64_2__324", %"rank__324")
1702 | # n3__324
%"cond__324"<BOOL,?> ⬅️ ::Equal(%"rank__324", %"int64_2_cast__324")
1703 | # n4__324
%"t_23__323"<FLOAT16,[unk__434,unk__435]> ⬅️ ::If(%"cond__324") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__324"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__324_139
%"result__324"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.3.self_attn.v_proj.weight") {perm=[1, 0]}
return %"result__324"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__324"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__324_140
%"result_0__324"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.3.self_attn.v_proj.weight")
return %"result_0__324"<FLOAT16,[1024,4096]>
}}
1704 | # Constant_3__323
%"_val_3__323"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
1705 | # n0__326
%"size_0__326"<INT64,[2]> ⬅️ ::Cast(%"_val_3__323") {to=7}
1706 | # n1__326
%"view_65__323"<FLOAT16,[unk__436,unk__437]> ⬅️ ::Reshape(%"model_layers_3_input_layernorm_1__300", %"size_0__326")
1707 | # n0__327
%"mm_23__323"<FLOAT16,[unk__436,unk__435]> ⬅️ ::MatMul(%"view_65__323", %"t_23__323")
1708 | # Constant_6__323
%"_val_6__323"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
1709 | # n0__328
%"size_0__328"<INT64,[3]> ⬅️ ::Cast(%"_val_6__323") {to=7}
1710 | # n1__328
%"model_layers_3_self_attn_v_proj_1__310"<FLOAT16,[unk__438,unk__439,unk__440]> ⬅️ ::Reshape(%"mm_23__323", %"size_0__328")
1711 | # Constant_61__310
%"_val_8__310"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
1712 | # n0__329
%"size_0__329"<INT64,[4]> ⬅️ ::Cast(%"_val_8__310") {to=7}
1713 | # n1__329
%"view_67__310"<FLOAT16,[unk__441,unk__442,unk__443,unk__444]> ⬅️ ::Reshape(%"model_layers_3_self_attn_q_proj_1__310", %"size_0__329")
1714 | # Transpose_63__310
%"transpose_12__310"<FLOAT16,[unk__441,unk__443,unk__442,unk__444]> ⬅️ ::Transpose(%"view_67__310") {perm=[0, 2, 1, 3]}
1715 | # Constant_64__310
%"_val_11__310"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
1716 | # n0__330
%"size_0__330"<INT64,[4]> ⬅️ ::Cast(%"_val_11__310") {to=7}
1717 | # n1__330
%"view_68__310"<FLOAT16,[unk__445,unk__446,unk__447,unk__448]> ⬅️ ::Reshape(%"model_layers_3_self_attn_k_proj_1__310", %"size_0__330")
1718 | # Transpose_66__310
%"transpose_13__310"<FLOAT16,[unk__445,unk__447,unk__446,unk__448]> ⬅️ ::Transpose(%"view_68__310") {perm=[0, 2, 1, 3]}
1719 | # Constant_67__310
%"_val_14__310"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
1720 | # n0__331
%"size_0__331"<INT64,[4]> ⬅️ ::Cast(%"_val_14__310") {to=7}
1721 | # n1__331
%"view_69__310"<FLOAT16,[unk__449,unk__450,unk__451,unk__452]> ⬅️ ::Reshape(%"model_layers_3_self_attn_v_proj_1__310", %"size_0__331")
1722 | # Transpose_69__310
%"model_1_6"<FLOAT16,[1,8,128,128]> ⬅️ ::Transpose(%"view_69__310") {perm=[0, 2, 1, 3]}
1723 | # Constant_8__332
%"_val_1__332"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1724 | # Cast_9__332
%"_val_2__332"<INT64,?> ⬅️ ::Cast(%"_val_1__332") {to=7}
1725 | # Constant_10__332
%"_val_3__332"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1726 | # Reshape_11__332
%"_val_4__332"<INT64,[1]> ⬅️ ::Reshape(%"_val_2__332", %"_val_3__332") {allowzero=0}
1727 | # Constant_12__332
%"_val_5__332"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1728 | # Cast_13__332
%"_val_6__332"<INT64,?> ⬅️ ::Cast(%"_val_5__332") {to=7}
1729 | # Constant_14__332
%"_val_7__332"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1730 | # Reshape_15__332
%"_val_8__332"<INT64,[1]> ⬅️ ::Reshape(%"_val_6__332", %"_val_7__332") {allowzero=0}
1731 | # Constant_16__332
%"_val_9__332"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1732 | # Cast_17__332
%"_val_10__332"<INT64,?> ⬅️ ::Cast(%"_val_9__332") {to=7}
1733 | # Constant_18__332
%"_val_11__332"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1734 | # Reshape_19__332
%"_val_12__332"<INT64,[1]> ⬅️ ::Reshape(%"_val_10__332", %"_val_11__332") {allowzero=0}
1735 | # Constant_20__332
%"_val_13__332"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1736 | # Cast_21__332
%"_val_14__332"<INT64,?> ⬅️ ::Cast(%"_val_13__332") {to=7}
1737 | # Constant_22__332
%"_val_15__332"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1738 | # Reshape_23__332
%"_val_16__332"<INT64,[1]> ⬅️ ::Reshape(%"_val_14__332", %"_val_15__332") {allowzero=0}
1739 | # Slice_24__332
%"model_layers_3_self_attn_rotary_emb_1__310"<FLOAT16,[unk__453,unk__454]> ⬅️ ::Slice(%"model.layers.3.self_attn.rotary_emb.cos_cached", %"_val_4__332", %"_val_8__332", %"_val_12__332", %"_val_16__332")
1740 | # Constant_25__332
%"_val_19__332"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1741 | # Cast_26__332
%"_val_20__332"<INT64,?> ⬅️ ::Cast(%"_val_19__332") {to=7}
1742 | # Constant_27__332
%"_val_21__332"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1743 | # Reshape_28__332
%"_val_22__332"<INT64,[1]> ⬅️ ::Reshape(%"_val_20__332", %"_val_21__332") {allowzero=0}
1744 | # Constant_29__332
%"_val_23__332"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1745 | # Cast_30__332
%"_val_24__332"<INT64,?> ⬅️ ::Cast(%"_val_23__332") {to=7}
1746 | # Constant_31__332
%"_val_25__332"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1747 | # Reshape_32__332
%"_val_26__332"<INT64,[1]> ⬅️ ::Reshape(%"_val_24__332", %"_val_25__332") {allowzero=0}
1748 | # Constant_33__332
%"_val_27__332"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1749 | # Cast_34__332
%"_val_28__332"<INT64,?> ⬅️ ::Cast(%"_val_27__332") {to=7}
1750 | # Constant_35__332
%"_val_29__332"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1751 | # Reshape_36__332
%"_val_30__332"<INT64,[1]> ⬅️ ::Reshape(%"_val_28__332", %"_val_29__332") {allowzero=0}
1752 | # Constant_37__332
%"_val_31__332"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1753 | # Cast_38__332
%"_val_32__332"<INT64,?> ⬅️ ::Cast(%"_val_31__332") {to=7}
1754 | # Constant_39__332
%"_val_33__332"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1755 | # Reshape_40__332
%"_val_34__332"<INT64,[1]> ⬅️ ::Reshape(%"_val_32__332", %"_val_33__332") {allowzero=0}
1756 | # Slice_41__332
%"model_layers_3_self_attn_rotary_emb_1_1__310"<FLOAT16,[unk__455,unk__456]> ⬅️ ::Slice(%"model.layers.3.self_attn.rotary_emb.sin_cached", %"_val_22__332", %"_val_26__332", %"_val_30__332", %"_val_34__332")
1757 | # Transpose_71__310
%"_val_21__310"<FLOAT16,[unk__453,unk__454]> ⬅️ ::Transpose(%"model_layers_3_self_attn_rotary_emb_1__310") {perm=[0, 1]}
1758 | # Max_72__310
%"_val_22__310"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
1759 | # Shape_73__310
%"_val_23__310"<INT64,[2]> ⬅️ ::Shape(%"_val_22__310") {start=0}
1760 | # Expand_74__310
%"_val_24__310"<INT64,[unk__457,unk__458]> ⬅️ ::Expand(%"view__1", %"_val_23__310")
1761 | # Constant_75__310
%"_val_25__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1762 | # Unsqueeze_76__310
%"_val_26__310"<INT64,[unk__457,unk__458,1]> ⬅️ ::Unsqueeze(%"_val_24__310", %"_val_25__310")
1763 | # Concat_77__310
%"_val_27__310"<INT64,[unk__457,unk__458,1]> ⬅️ ::Concat(%"_val_26__310") {axis=-1}
1764 | # GatherND_78__310
%"_val_28__310"<FLOAT16,[unk__457,unk__458,unk__454]> ⬅️ ::GatherND(%"_val_21__310", %"_val_27__310") {batch_dims=0}
1765 | # Transpose_79__310
%"index_6__310"<FLOAT16,[unk__457,unk__458,unk__454]> ⬅️ ::Transpose(%"_val_28__310") {perm=[0, 1, 2]}
1766 | # n0__333
%"dim__333"<INT64,?> ⬅️ ::Constant() {value_int=1}
1767 | # n1__333
%"dim_0__333"<INT64,?> ⬅️ ::Cast(%"dim__333") {to=7}
1768 | # n2__333
%"unsqueeze_13__310"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_6__310", %"dim_0__333")
1769 | # Transpose_81__310
%"_val_31__310"<FLOAT16,[unk__455,unk__456]> ⬅️ ::Transpose(%"model_layers_3_self_attn_rotary_emb_1_1__310") {perm=[0, 1]}
1770 | # Max_82__310
%"_val_32__310"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
1771 | # Shape_83__310
%"_val_33__310"<INT64,[2]> ⬅️ ::Shape(%"_val_32__310") {start=0}
1772 | # Expand_84__310
%"_val_34__310"<INT64,[unk__459,unk__460]> ⬅️ ::Expand(%"view__1", %"_val_33__310")
1773 | # Constant_85__310
%"_val_35__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1774 | # Unsqueeze_86__310
%"_val_36__310"<INT64,[unk__459,unk__460,1]> ⬅️ ::Unsqueeze(%"_val_34__310", %"_val_35__310")
1775 | # Concat_87__310
%"_val_37__310"<INT64,[unk__459,unk__460,1]> ⬅️ ::Concat(%"_val_36__310") {axis=-1}
1776 | # GatherND_88__310
%"_val_38__310"<FLOAT16,[unk__459,unk__460,unk__456]> ⬅️ ::GatherND(%"_val_31__310", %"_val_37__310") {batch_dims=0}
1777 | # Transpose_89__310
%"index_7__310"<FLOAT16,[unk__459,unk__460,unk__456]> ⬅️ ::Transpose(%"_val_38__310") {perm=[0, 1, 2]}
1778 | # n0__334
%"dim__334"<INT64,?> ⬅️ ::Constant() {value_int=1}
1779 | # n1__334
%"dim_0__334"<INT64,?> ⬅️ ::Cast(%"dim__334") {to=7}
1780 | # n2__334
%"unsqueeze_14__310"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_7__310", %"dim_0__334")
1781 | # n0__335
%"mul_32__310"<FLOAT16,?> ⬅️ ::Mul(%"transpose_12__310", %"unsqueeze_13__310")
1782 | # Constant_92__310
%"_val_42__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1783 | # Cast_93__310
%"_val_43__310"<INT64,?> ⬅️ ::Cast(%"_val_42__310") {to=7}
1784 | # Constant_94__310
%"_val_44__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1785 | # Reshape_95__310
%"_val_45__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_43__310", %"_val_44__310") {allowzero=0}
1786 | # Constant_96__310
%"_val_46__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1787 | # Cast_97__310
%"_val_47__310"<INT64,?> ⬅️ ::Cast(%"_val_46__310") {to=7}
1788 | # Constant_98__310
%"_val_48__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1789 | # Reshape_99__310
%"_val_49__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_47__310", %"_val_48__310") {allowzero=0}
1790 | # Constant_100__310
%"_val_50__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1791 | # Cast_101__310
%"_val_51__310"<INT64,?> ⬅️ ::Cast(%"_val_50__310") {to=7}
1792 | # Constant_102__310
%"_val_52__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1793 | # Reshape_103__310
%"_val_53__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_51__310", %"_val_52__310") {allowzero=0}
1794 | # Constant_104__310
%"_val_54__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1795 | # Cast_105__310
%"_val_55__310"<INT64,?> ⬅️ ::Cast(%"_val_54__310") {to=7}
1796 | # Constant_106__310
%"_val_56__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1797 | # Reshape_107__310
%"_val_57__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_55__310", %"_val_56__310") {allowzero=0}
1798 | # Slice_108__310
%"slice_45__310"<FLOAT16,[unk__461,unk__462,unk__463,unk__464]> ⬅️ ::Slice(%"transpose_12__310", %"_val_45__310", %"_val_49__310", %"_val_53__310", %"_val_57__310")
1799 | # Constant_109__310
%"_val_59__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1800 | # Cast_110__310
%"_val_60__310"<INT64,?> ⬅️ ::Cast(%"_val_59__310") {to=7}
1801 | # Constant_111__310
%"_val_61__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1802 | # Reshape_112__310
%"_val_62__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_60__310", %"_val_61__310") {allowzero=0}
1803 | # Constant_113__310
%"_val_63__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1804 | # Cast_114__310
%"_val_64__310"<INT64,?> ⬅️ ::Cast(%"_val_63__310") {to=7}
1805 | # Constant_115__310
%"_val_65__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1806 | # Reshape_116__310
%"_val_66__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_64__310", %"_val_65__310") {allowzero=0}
1807 | # Constant_117__310
%"_val_67__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1808 | # Cast_118__310
%"_val_68__310"<INT64,?> ⬅️ ::Cast(%"_val_67__310") {to=7}
1809 | # Constant_119__310
%"_val_69__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1810 | # Reshape_120__310
%"_val_70__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_68__310", %"_val_69__310") {allowzero=0}
1811 | # Constant_121__310
%"_val_71__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1812 | # Cast_122__310
%"_val_72__310"<INT64,?> ⬅️ ::Cast(%"_val_71__310") {to=7}
1813 | # Constant_123__310
%"_val_73__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1814 | # Reshape_124__310
%"_val_74__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_72__310", %"_val_73__310") {allowzero=0}
1815 | # Slice_125__310
%"slice_46__310"<FLOAT16,[unk__465,unk__466,unk__467,unk__468]> ⬅️ ::Slice(%"transpose_12__310", %"_val_62__310", %"_val_66__310", %"_val_70__310", %"_val_74__310")
1816 | # n0__336
%"neg_6__310"<FLOAT16,[unk__465,unk__466,unk__467,unk__468]> ⬅️ ::Neg(%"slice_46__310")
1817 | # SequenceConstruct_127__310
%"77__310"<Sequence(Tensor(FLOAT16)),[unk__469,unk__470,unk__471,unk__472]> ⬅️ ::SequenceConstruct(%"neg_6__310", %"slice_45__310")
1818 | # n0__337
%"cat_6__310"<FLOAT16,[unk__469,unk__470,unk__471,unk__473]> ⬅️ ::ConcatFromSequence(%"77__310") {axis=-1}
1819 | # n0__338
%"mul_33__310"<FLOAT16,?> ⬅️ ::Mul(%"cat_6__310", %"unsqueeze_14__310")
1820 | # n0__339
%"alpha__339"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
1821 | # n1__339
%"alpha_0__339"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__339", %"mul_33__310")
1822 | # n2__339
%"other_1__339"<FLOAT16,?> ⬅️ ::Mul(%"mul_33__310", %"alpha_0__339")
1823 | # n3__339
%"add_19__310"<FLOAT16,?> ⬅️ ::Add(%"mul_32__310", %"other_1__339")
1824 | # n0__340
%"mul_34__310"<FLOAT16,?> ⬅️ ::Mul(%"transpose_13__310", %"unsqueeze_13__310")
1825 | # Constant_132__310
%"_val_82__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1826 | # Cast_133__310
%"_val_83__310"<INT64,?> ⬅️ ::Cast(%"_val_82__310") {to=7}
1827 | # Constant_134__310
%"_val_84__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1828 | # Reshape_135__310
%"_val_85__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_83__310", %"_val_84__310") {allowzero=0}
1829 | # Constant_136__310
%"_val_86__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1830 | # Cast_137__310
%"_val_87__310"<INT64,?> ⬅️ ::Cast(%"_val_86__310") {to=7}
1831 | # Constant_138__310
%"_val_88__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1832 | # Reshape_139__310
%"_val_89__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_87__310", %"_val_88__310") {allowzero=0}
1833 | # Constant_140__310
%"_val_90__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1834 | # Cast_141__310
%"_val_91__310"<INT64,?> ⬅️ ::Cast(%"_val_90__310") {to=7}
1835 | # Constant_142__310
%"_val_92__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1836 | # Reshape_143__310
%"_val_93__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_91__310", %"_val_92__310") {allowzero=0}
1837 | # Constant_144__310
%"_val_94__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1838 | # Cast_145__310
%"_val_95__310"<INT64,?> ⬅️ ::Cast(%"_val_94__310") {to=7}
1839 | # Constant_146__310
%"_val_96__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1840 | # Reshape_147__310
%"_val_97__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_95__310", %"_val_96__310") {allowzero=0}
1841 | # Slice_148__310
%"slice_47__310"<FLOAT16,[unk__474,unk__475,unk__476,unk__477]> ⬅️ ::Slice(%"transpose_13__310", %"_val_85__310", %"_val_89__310", %"_val_93__310", %"_val_97__310")
1842 | # Constant_149__310
%"_val_99__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1843 | # Cast_150__310
%"_val_100__310"<INT64,?> ⬅️ ::Cast(%"_val_99__310") {to=7}
1844 | # Constant_151__310
%"_val_101__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1845 | # Reshape_152__310
%"_val_102__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_100__310", %"_val_101__310") {allowzero=0}
1846 | # Constant_153__310
%"_val_103__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1847 | # Cast_154__310
%"_val_104__310"<INT64,?> ⬅️ ::Cast(%"_val_103__310") {to=7}
1848 | # Constant_155__310
%"_val_105__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1849 | # Reshape_156__310
%"_val_106__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_104__310", %"_val_105__310") {allowzero=0}
1850 | # Constant_157__310
%"_val_107__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1851 | # Cast_158__310
%"_val_108__310"<INT64,?> ⬅️ ::Cast(%"_val_107__310") {to=7}
1852 | # Constant_159__310
%"_val_109__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1853 | # Reshape_160__310
%"_val_110__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_108__310", %"_val_109__310") {allowzero=0}
1854 | # Constant_161__310
%"_val_111__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1855 | # Cast_162__310
%"_val_112__310"<INT64,?> ⬅️ ::Cast(%"_val_111__310") {to=7}
1856 | # Constant_163__310
%"_val_113__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1857 | # Reshape_164__310
%"_val_114__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_112__310", %"_val_113__310") {allowzero=0}
1858 | # Slice_165__310
%"slice_48__310"<FLOAT16,[unk__478,unk__479,unk__480,unk__481]> ⬅️ ::Slice(%"transpose_13__310", %"_val_102__310", %"_val_106__310", %"_val_110__310", %"_val_114__310")
1859 | # n0__341
%"neg_7__310"<FLOAT16,[unk__478,unk__479,unk__480,unk__481]> ⬅️ ::Neg(%"slice_48__310")
1860 | # SequenceConstruct_167__310
%"117__310"<Sequence(Tensor(FLOAT16)),[unk__482,unk__483,unk__484,unk__485]> ⬅️ ::SequenceConstruct(%"neg_7__310", %"slice_47__310")
1861 | # n0__342
%"cat_7__310"<FLOAT16,[unk__482,unk__483,unk__484,unk__486]> ⬅️ ::ConcatFromSequence(%"117__310") {axis=-1}
1862 | # n0__343
%"mul_35__310"<FLOAT16,?> ⬅️ ::Mul(%"cat_7__310", %"unsqueeze_14__310")
1863 | # n0__344
%"alpha__344"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
1864 | # n1__344
%"alpha_0__344"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__344", %"mul_35__310")
1865 | # n2__344
%"other_1__344"<FLOAT16,?> ⬅️ ::Mul(%"mul_35__310", %"alpha_0__344")
1866 | # n3__344
%"model_1_7"<FLOAT16,[1,8,128,128]> ⬅️ ::Add(%"mul_34__310", %"other_1__344")
1867 | # Constant_171__310
%"_val_121__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1868 | # Cast_172__310
%"_val_122__310"<INT64,?> ⬅️ ::Cast(%"_val_121__310") {to=7}
1869 | # Constant_173__310
%"_val_123__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1870 | # Reshape_174__310
%"_val_124__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_122__310", %"_val_123__310") {allowzero=0}
1871 | # Constant_175__310
%"_val_125__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1872 | # Cast_176__310
%"_val_126__310"<INT64,?> ⬅️ ::Cast(%"_val_125__310") {to=7}
1873 | # Constant_177__310
%"_val_127__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1874 | # Reshape_178__310
%"_val_128__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_126__310", %"_val_127__310") {allowzero=0}
1875 | # Constant_179__310
%"_val_129__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1876 | # Cast_180__310
%"_val_130__310"<INT64,?> ⬅️ ::Cast(%"_val_129__310") {to=7}
1877 | # Constant_181__310
%"_val_131__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1878 | # Reshape_182__310
%"_val_132__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_130__310", %"_val_131__310") {allowzero=0}
1879 | # Constant_183__310
%"_val_133__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1880 | # Cast_184__310
%"_val_134__310"<INT64,?> ⬅️ ::Cast(%"_val_133__310") {to=7}
1881 | # Constant_185__310
%"_val_135__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1882 | # Reshape_186__310
%"_val_136__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_134__310", %"_val_135__310") {allowzero=0}
1883 | # Slice_187__310
%"slice_49__310"<FLOAT16,[unk__487,unk__488,unk__489,unk__490]> ⬅️ ::Slice(%"model_1_7", %"_val_124__310", %"_val_128__310", %"_val_132__310", %"_val_136__310")
1884 | # Constant_188__310
%"_val_138__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1885 | # Cast_189__310
%"_val_139__310"<INT64,?> ⬅️ ::Cast(%"_val_138__310") {to=7}
1886 | # Constant_190__310
%"_val_140__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1887 | # Reshape_191__310
%"_val_141__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_139__310", %"_val_140__310") {allowzero=0}
1888 | # Constant_192__310
%"_val_142__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1889 | # Cast_193__310
%"_val_143__310"<INT64,?> ⬅️ ::Cast(%"_val_142__310") {to=7}
1890 | # Constant_194__310
%"_val_144__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1891 | # Reshape_195__310
%"_val_145__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_143__310", %"_val_144__310") {allowzero=0}
1892 | # Constant_196__310
%"_val_146__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1893 | # Cast_197__310
%"_val_147__310"<INT64,?> ⬅️ ::Cast(%"_val_146__310") {to=7}
1894 | # Constant_198__310
%"_val_148__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1895 | # Reshape_199__310
%"_val_149__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_147__310", %"_val_148__310") {allowzero=0}
1896 | # Constant_200__310
%"_val_150__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1897 | # Cast_201__310
%"_val_151__310"<INT64,?> ⬅️ ::Cast(%"_val_150__310") {to=7}
1898 | # Constant_202__310
%"_val_152__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1899 | # Reshape_203__310
%"_val_153__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_151__310", %"_val_152__310") {allowzero=0}
1900 | # Slice_204__310
%"slice_50__310"<FLOAT16,[unk__491,unk__492,unk__493,unk__494]> ⬅️ ::Slice(%"slice_49__310", %"_val_141__310", %"_val_145__310", %"_val_149__310", %"_val_153__310")
1901 | # n0__345
%"dim__345"<INT64,?> ⬅️ ::Constant() {value_int=2}
1902 | # n1__345
%"dim_0__345"<INT64,?> ⬅️ ::Cast(%"dim__345") {to=7}
1903 | # n2__345
%"unsqueeze_15__310"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_50__310", %"dim_0__345")
1904 | # Constant_206__310
%"_val_156__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1905 | # Cast_207__310
%"_val_157__310"<INT64,?> ⬅️ ::Cast(%"_val_156__310") {to=7}
1906 | # Constant_208__310
%"_val_158__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1907 | # Reshape_209__310
%"_val_159__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_157__310", %"_val_158__310") {allowzero=0}
1908 | # Constant_210__310
%"_val_160__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1909 | # Cast_211__310
%"_val_161__310"<INT64,?> ⬅️ ::Cast(%"_val_160__310") {to=7}
1910 | # Constant_212__310
%"_val_162__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1911 | # Reshape_213__310
%"_val_163__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_161__310", %"_val_162__310") {allowzero=0}
1912 | # Constant_214__310
%"_val_164__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1913 | # Cast_215__310
%"_val_165__310"<INT64,?> ⬅️ ::Cast(%"_val_164__310") {to=7}
1914 | # Constant_216__310
%"_val_166__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1915 | # Reshape_217__310
%"_val_167__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_165__310", %"_val_166__310") {allowzero=0}
1916 | # Constant_218__310
%"_val_168__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1917 | # Cast_219__310
%"_val_169__310"<INT64,?> ⬅️ ::Cast(%"_val_168__310") {to=7}
1918 | # Constant_220__310
%"_val_170__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1919 | # Reshape_221__310
%"_val_171__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_169__310", %"_val_170__310") {allowzero=0}
1920 | # Slice_222__310
%"slice_51__310"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_15__310", %"_val_159__310", %"_val_163__310", %"_val_167__310", %"_val_171__310")
1921 | # Constant_223__310
%"_val_173__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1922 | # Cast_224__310
%"_val_174__310"<INT64,?> ⬅️ ::Cast(%"_val_173__310") {to=7}
1923 | # Constant_225__310
%"_val_175__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1924 | # Reshape_226__310
%"_val_176__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_174__310", %"_val_175__310") {allowzero=0}
1925 | # Constant_227__310
%"_val_177__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1926 | # Cast_228__310
%"_val_178__310"<INT64,?> ⬅️ ::Cast(%"_val_177__310") {to=7}
1927 | # Constant_229__310
%"_val_179__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1928 | # Reshape_230__310
%"_val_180__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_178__310", %"_val_179__310") {allowzero=0}
1929 | # Constant_231__310
%"_val_181__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1930 | # Cast_232__310
%"_val_182__310"<INT64,?> ⬅️ ::Cast(%"_val_181__310") {to=7}
1931 | # Constant_233__310
%"_val_183__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1932 | # Reshape_234__310
%"_val_184__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_182__310", %"_val_183__310") {allowzero=0}
1933 | # Constant_235__310
%"_val_185__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1934 | # Cast_236__310
%"_val_186__310"<INT64,?> ⬅️ ::Cast(%"_val_185__310") {to=7}
1935 | # Constant_237__310
%"_val_187__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1936 | # Reshape_238__310
%"_val_188__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_186__310", %"_val_187__310") {allowzero=0}
1937 | # Slice_239__310
%"slice_52__310"<FLOAT16,?> ⬅️ ::Slice(%"slice_51__310", %"_val_176__310", %"_val_180__310", %"_val_184__310", %"_val_188__310")
1938 | # Constant_240__310
%"_val_190__310"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
1939 | # n0__346
%"size_0__346"<INT64,[5]> ⬅️ ::Cast(%"_val_190__310") {to=7}
1940 | # n1__346
%"size_1__346"<INT64,[5]> ⬅️ ::Abs(%"size_0__346")
1941 | # n2__346
%"expand_6__310"<FLOAT16,?> ⬅️ ::Expand(%"slice_52__310", %"size_1__346")
1942 | # n0__347
%"clone_6__310"<FLOAT16,?> ⬅️ ::Identity(%"expand_6__310")
1943 | # Constant_243__310
%"_val_193__310"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
1944 | # n0__348
%"size_0__348"<INT64,[4]> ⬅️ ::Cast(%"_val_193__310") {to=7}
1945 | # n1__348
%"view_70__310"<FLOAT16,[unk__495,unk__496,unk__497,unk__498]> ⬅️ ::Reshape(%"clone_6__310", %"size_0__348")
1946 | # Constant_245__310
%"_val_195__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1947 | # Cast_246__310
%"_val_196__310"<INT64,?> ⬅️ ::Cast(%"_val_195__310") {to=7}
1948 | # Constant_247__310
%"_val_197__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1949 | # Reshape_248__310
%"_val_198__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_196__310", %"_val_197__310") {allowzero=0}
1950 | # Constant_249__310
%"_val_199__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1951 | # Cast_250__310
%"_val_200__310"<INT64,?> ⬅️ ::Cast(%"_val_199__310") {to=7}
1952 | # Constant_251__310
%"_val_201__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1953 | # Reshape_252__310
%"_val_202__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_200__310", %"_val_201__310") {allowzero=0}
1954 | # Constant_253__310
%"_val_203__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1955 | # Cast_254__310
%"_val_204__310"<INT64,?> ⬅️ ::Cast(%"_val_203__310") {to=7}
1956 | # Constant_255__310
%"_val_205__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1957 | # Reshape_256__310
%"_val_206__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_204__310", %"_val_205__310") {allowzero=0}
1958 | # Constant_257__310
%"_val_207__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1959 | # Cast_258__310
%"_val_208__310"<INT64,?> ⬅️ ::Cast(%"_val_207__310") {to=7}
1960 | # Constant_259__310
%"_val_209__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1961 | # Reshape_260__310
%"_val_210__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_208__310", %"_val_209__310") {allowzero=0}
1962 | # Slice_261__310
%"slice_53__310"<FLOAT16,[unk__499,unk__500,unk__501,unk__502]> ⬅️ ::Slice(%"model_1_6", %"_val_198__310", %"_val_202__310", %"_val_206__310", %"_val_210__310")
1963 | # Constant_262__310
%"_val_212__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1964 | # Cast_263__310
%"_val_213__310"<INT64,?> ⬅️ ::Cast(%"_val_212__310") {to=7}
1965 | # Constant_264__310
%"_val_214__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1966 | # Reshape_265__310
%"_val_215__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_213__310", %"_val_214__310") {allowzero=0}
1967 | # Constant_266__310
%"_val_216__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1968 | # Cast_267__310
%"_val_217__310"<INT64,?> ⬅️ ::Cast(%"_val_216__310") {to=7}
1969 | # Constant_268__310
%"_val_218__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1970 | # Reshape_269__310
%"_val_219__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_217__310", %"_val_218__310") {allowzero=0}
1971 | # Constant_270__310
%"_val_220__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1972 | # Cast_271__310
%"_val_221__310"<INT64,?> ⬅️ ::Cast(%"_val_220__310") {to=7}
1973 | # Constant_272__310
%"_val_222__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1974 | # Reshape_273__310
%"_val_223__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_221__310", %"_val_222__310") {allowzero=0}
1975 | # Constant_274__310
%"_val_224__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1976 | # Cast_275__310
%"_val_225__310"<INT64,?> ⬅️ ::Cast(%"_val_224__310") {to=7}
1977 | # Constant_276__310
%"_val_226__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1978 | # Reshape_277__310
%"_val_227__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_225__310", %"_val_226__310") {allowzero=0}
1979 | # Slice_278__310
%"slice_54__310"<FLOAT16,[unk__503,unk__504,unk__505,unk__506]> ⬅️ ::Slice(%"slice_53__310", %"_val_215__310", %"_val_219__310", %"_val_223__310", %"_val_227__310")
1980 | # n0__349
%"dim__349"<INT64,?> ⬅️ ::Constant() {value_int=2}
1981 | # n1__349
%"dim_0__349"<INT64,?> ⬅️ ::Cast(%"dim__349") {to=7}
1982 | # n2__349
%"unsqueeze_16__310"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_54__310", %"dim_0__349")
1983 | # Constant_280__310
%"_val_230__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1984 | # Cast_281__310
%"_val_231__310"<INT64,?> ⬅️ ::Cast(%"_val_230__310") {to=7}
1985 | # Constant_282__310
%"_val_232__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1986 | # Reshape_283__310
%"_val_233__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_231__310", %"_val_232__310") {allowzero=0}
1987 | # Constant_284__310
%"_val_234__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1988 | # Cast_285__310
%"_val_235__310"<INT64,?> ⬅️ ::Cast(%"_val_234__310") {to=7}
1989 | # Constant_286__310
%"_val_236__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1990 | # Reshape_287__310
%"_val_237__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_235__310", %"_val_236__310") {allowzero=0}
1991 | # Constant_288__310
%"_val_238__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1992 | # Cast_289__310
%"_val_239__310"<INT64,?> ⬅️ ::Cast(%"_val_238__310") {to=7}
1993 | # Constant_290__310
%"_val_240__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1994 | # Reshape_291__310
%"_val_241__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_239__310", %"_val_240__310") {allowzero=0}
1995 | # Constant_292__310
%"_val_242__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
1996 | # Cast_293__310
%"_val_243__310"<INT64,?> ⬅️ ::Cast(%"_val_242__310") {to=7}
1997 | # Constant_294__310
%"_val_244__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
1998 | # Reshape_295__310
%"_val_245__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_243__310", %"_val_244__310") {allowzero=0}
1999 | # Slice_296__310
%"slice_55__310"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_16__310", %"_val_233__310", %"_val_237__310", %"_val_241__310", %"_val_245__310")
2000 | # Constant_297__310
%"_val_247__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2001 | # Cast_298__310
%"_val_248__310"<INT64,?> ⬅️ ::Cast(%"_val_247__310") {to=7}
2002 | # Constant_299__310
%"_val_249__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2003 | # Reshape_300__310
%"_val_250__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_248__310", %"_val_249__310") {allowzero=0}
2004 | # Constant_301__310
%"_val_251__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2005 | # Cast_302__310
%"_val_252__310"<INT64,?> ⬅️ ::Cast(%"_val_251__310") {to=7}
2006 | # Constant_303__310
%"_val_253__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2007 | # Reshape_304__310
%"_val_254__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_252__310", %"_val_253__310") {allowzero=0}
2008 | # Constant_305__310
%"_val_255__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2009 | # Cast_306__310
%"_val_256__310"<INT64,?> ⬅️ ::Cast(%"_val_255__310") {to=7}
2010 | # Constant_307__310
%"_val_257__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2011 | # Reshape_308__310
%"_val_258__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_256__310", %"_val_257__310") {allowzero=0}
2012 | # Constant_309__310
%"_val_259__310"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2013 | # Cast_310__310
%"_val_260__310"<INT64,?> ⬅️ ::Cast(%"_val_259__310") {to=7}
2014 | # Constant_311__310
%"_val_261__310"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2015 | # Reshape_312__310
%"_val_262__310"<INT64,[1]> ⬅️ ::Reshape(%"_val_260__310", %"_val_261__310") {allowzero=0}
2016 | # Slice_313__310
%"slice_56__310"<FLOAT16,?> ⬅️ ::Slice(%"slice_55__310", %"_val_250__310", %"_val_254__310", %"_val_258__310", %"_val_262__310")
2017 | # Constant_314__310
%"_val_264__310"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
2018 | # n0__350
%"size_0__350"<INT64,[5]> ⬅️ ::Cast(%"_val_264__310") {to=7}
2019 | # n1__350
%"size_1__350"<INT64,[5]> ⬅️ ::Abs(%"size_0__350")
2020 | # n2__350
%"expand_7__310"<FLOAT16,?> ⬅️ ::Expand(%"slice_56__310", %"size_1__350")
2021 | # n0__351
%"clone_7__310"<FLOAT16,?> ⬅️ ::Identity(%"expand_7__310")
2022 | # Constant_317__310
%"_val_267__310"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
2023 | # n0__352
%"size_0__352"<INT64,[4]> ⬅️ ::Cast(%"_val_267__310") {to=7}
2024 | # n1__352
%"view_71__310"<FLOAT16,[unk__507,unk__508,unk__509,unk__510]> ⬅️ ::Reshape(%"clone_7__310", %"size_0__352")
2025 | # n0__353
%"tmp__353"<INT64,[unk__511]> ⬅️ ::Shape(%"add_19__310")
2026 | # n1__353
%"int64_m1__353"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_m1')}
2027 | # n2__353
%"tmp_subscripted__353"<INT64,?> ⬅️ ::Gather(%"tmp__353", %"int64_m1__353") {axis=0}
2028 | # n3__353
%"embedding_size__353"<FLOAT16,?> ⬅️ ::CastLike(%"tmp_subscripted__353", %"add_19__310")
2029 | # n4__353
%"const__353"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
2030 | # n5__353
%"tmp_0__353"<FLOAT16,?> ⬅️ ::Sqrt(%"embedding_size__353")
2031 | # n6__353
%"const_cast__353"<FLOAT16,?> ⬅️ ::CastLike(%"const__353", %"tmp_0__353")
2032 | # n7__353
%"_val_269__310"<FLOAT16,?> ⬅️ ::Div(%"const_cast__353", %"tmp_0__353")
2033 | # CastLike_320__310
%"_val_270__310"<FLOAT16,?> ⬅️ ::CastLike(%"_val_269__310", %"add_19__310")
2034 | # n0__354
%"tmp__354"<INT64,[unk__512]> ⬅️ ::Shape(%"add_19__310")
2035 | # n1__354
%"int64_0_1d__354"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
2036 | # n2__354
%"int64_1_1d__354"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
2037 | # n3__354
%"int64_m2_1d__354"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
2038 | # n4__354
%"int64_m1_1d__354"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
2039 | # n5__354
%"target_length__354"<INT64,[unk__513]> ⬅️ ::Slice(%"tmp__354", %"int64_m2_1d__354", %"int64_m1_1d__354", %"int64_0_1d__354", %"int64_1_1d__354")
2040 | # n6__354
%"tmp_0__354"<INT64,[4]> ⬅️ ::Shape(%"view_70__310")
2041 | # n7__354
%"int64_0_1d_1__354"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
2042 | # n8__354
%"int64_1_1d_2__354"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
2043 | # n9__354
%"int64_m2_1d_3__354"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_3')}
2044 | # n10__354
%"int64_m1_1d_4__354"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_4')}
2045 | # n11__354
%"source_length__354"<INT64,[1]> ⬅️ ::Slice(%"tmp_0__354", %"int64_m2_1d_3__354", %"int64_m1_1d_4__354", %"int64_0_1d_1__354", %"int64_1_1d_2__354")
2046 | # n12__354
%"size__354"<INT64,[unk__514]> ⬅️ ::Concat(%"target_length__354", %"source_length__354") {axis=0}
2047 | # n13__354
%"const__354"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
2048 | # n14__354
%"attn_mask__354"<FLOAT,?> ⬅️ ::Expand(%"const__354", %"size__354")
2049 | # n15__354
%"attn_mask_5__354"<FLOAT,?> ⬅️ ::Trilu(%"attn_mask__354") {upper=0}
2050 | # n16__354
%"const_6__354"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_6')}
2051 | # n17__354
%"const_6_cast__354"<FLOAT,?> ⬅️ ::CastLike(%"const_6__354", %"attn_mask_5__354")
2052 | # n18__354
%"tmp_7__354"<BOOL,?> ⬅️ ::Equal(%"attn_mask_5__354", %"const_6_cast__354")
2053 | # n19__354
%"tmp_8__354"<FLOAT,?> ⬅️ ::Constant() {value_float=-inf}
2054 | # n20__354
%"const_9__354"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
2055 | # n21__354
%"const_9_cast__354"<FLOAT,?> ⬅️ ::CastLike(%"const_9__354", %"tmp_8__354")
2056 | # n22__354
%"attn_mask_10__354"<FLOAT,?> ⬅️ ::Where(%"tmp_7__354", %"tmp_8__354", %"const_9_cast__354")
2057 | # n23__354
%"_val_271__310"<FLOAT16,?> ⬅️ ::CastLike(%"attn_mask_10__354", %"add_19__310")
2058 | # n0__355
%"key_shape__355"<INT64,[4]> ⬅️ ::Shape(%"view_70__310")
2059 | # n1__355
%"int64_0_1d__355"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
2060 | # n2__355
%"int64_1_1d__355"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
2061 | # n3__355
%"int64_m1_1d__355"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
2062 | # n4__355
%"int64_9223372036854775807_1d__355"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_9223372036854775807_1d')}
2063 | # n5__355
%"key_last_dim__355"<INT64,[1]> ⬅️ ::Slice(%"key_shape__355", %"int64_m1_1d__355", %"int64_9223372036854775807_1d__355", %"int64_0_1d__355", %"int64_1_1d__355")
2064 | # n6__355
%"int64_0_1d_0__355"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_0')}
2065 | # n7__355
%"int64_1_1d_1__355"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_1')}
2066 | # n8__355
%"int64_m2_1d__355"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
2067 | # n9__355
%"int64_m1_1d_2__355"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_2')}
2068 | # n10__355
%"key_second_last_dim__355"<INT64,[1]> ⬅️ ::Slice(%"key_shape__355", %"int64_m2_1d__355", %"int64_m1_1d_2__355", %"int64_0_1d_0__355", %"int64_1_1d_1__355")
2069 | # n11__355
%"int64_0_1d_3__355"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
2070 | # n12__355
%"int64_1_1d_4__355"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
2071 | # n13__355
%"int64_m2_1d_5__355"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_5')}
2072 | # n14__355
%"key_first_dims__355"<INT64,[2]> ⬅️ ::Slice(%"key_shape__355", %"int64_0_1d_3__355", %"int64_m2_1d_5__355", %"int64_0_1d_3__355", %"int64_1_1d_4__355")
2073 | # n15__355
%"tmp__355"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2074 | # n16__355
%"key_squeezed_shape__355"<INT64,[3]> ⬅️ ::Concat(%"tmp__355", %"key_second_last_dim__355", %"key_last_dim__355") {axis=0}
2075 | # n17__355
%"key_squeezed__355"<FLOAT16,[unk__515,unk__516,unk__517]> ⬅️ ::Reshape(%"view_70__310", %"key_squeezed_shape__355")
2076 | # n18__355
%"key_squeezed_transposed__355"<FLOAT16,[unk__515,unk__517,unk__516]> ⬅️ ::Transpose(%"key_squeezed__355") {perm=[0, 2, 1]}
2077 | # n19__355
%"key_transposed_shape__355"<INT64,[4]> ⬅️ ::Concat(%"key_first_dims__355", %"key_last_dim__355", %"key_second_last_dim__355") {axis=0}
2078 | # n20__355
%"key_transposed__355"<FLOAT16,[unk__518,unk__519,unk__520,unk__521]> ⬅️ ::Reshape(%"key_squeezed_transposed__355", %"key_transposed_shape__355")
2079 | # n21__355
%"tmp_6__355"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__310")
2080 | # n22__355
%"query_scaled__355"<FLOAT16,?> ⬅️ ::Mul(%"add_19__310", %"tmp_6__355")
2081 | # n23__355
%"tmp_7__355"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__310")
2082 | # n24__355
%"key_transposed_scaled__355"<FLOAT16,[unk__518,unk__519,unk__520,unk__521]> ⬅️ ::Mul(%"key_transposed__355", %"tmp_7__355")
2083 | # n25__355
%"tmp_8__355"<FLOAT16,?> ⬅️ ::MatMul(%"query_scaled__355", %"key_transposed_scaled__355")
2084 | # n26__355
%"tmp_9__355"<FLOAT16,?> ⬅️ ::Add(%"tmp_8__355", %"_val_271__310")
2085 | # n27__355
%"attn_weight__355"<FLOAT16,?> ⬅️ ::Softmax(%"tmp_9__355") {axis=-1}
2086 | # n28__355
%"dropout_p__355"<FLOAT,?> ⬅️ ::Constant() {value_float=0.0}
2087 | # n29__355
%"attn_weight_10__355"<FLOAT16,?>, %"___355"<BOOL,?> ⬅️ ::Dropout(%"attn_weight__355", %"dropout_p__355")
2088 | # n30__355
%"_scaled_dot_product_efficient_attention_3__310"<FLOAT16,?> ⬅️ ::MatMul(%"attn_weight_10__355", %"view_71__310")
2089 | # n0__356
%"query_0__356"<FLOAT16,?> ⬅️ ::Transpose(%"add_19__310") {perm=[0, 2, 1, 3]}
2090 | # n1__356
%"query_shape__356"<INT64,[unk__522]> ⬅️ ::Shape(%"query_0__356")
2091 | # n2__356
%"int64_0_1d__356"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
2092 | # n3__356
%"int64_1_1d__356"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
2093 | # n4__356
%"query_first_dims__356"<INT64,[unk__523]> ⬅️ ::Slice(%"query_shape__356", %"int64_0_1d__356", %"int64_1_1d__356", %"int64_0_1d__356", %"int64_1_1d__356")
2094 | # n5__356
%"int64_0_1d_1__356"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
2095 | # n6__356
%"int64_1_1d_2__356"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
2096 | # n7__356
%"int64_2_1d__356"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_2_1d')}
2097 | # n8__356
%"query_second_dims__356"<INT64,[unk__524]> ⬅️ ::Slice(%"query_shape__356", %"int64_1_1d_2__356", %"int64_2_1d__356", %"int64_0_1d_1__356", %"int64_1_1d_2__356")
2098 | # n9__356
%"int64_0_1d_3__356"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
2099 | # n10__356
%"int64_1_1d_4__356"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
2100 | # n11__356
%"int64_m2_1d__356"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
2101 | # n12__356
%"int64_m1_1d__356"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
2102 | # n13__356
%"num_heads__356"<INT64,[unk__525]> ⬅️ ::Slice(%"query_shape__356", %"int64_m2_1d__356", %"int64_m1_1d__356", %"int64_0_1d_3__356", %"int64_1_1d_4__356")
2103 | # n14__356
%"compute_log_sumexp__356"<INT64,?> ⬅️ ::Constant() {value_int=0}
2104 | # n15__356
%"compute_log_sumexp_as_bool__356"<BOOL,?> ⬅️ ::Cast(%"compute_log_sumexp__356") {to=9}
2105 | # n16__356
%"_scaled_dot_product_efficient_attention_3_1__310"<FLOAT,?> ⬅️ ::If(%"compute_log_sumexp_as_bool__356") {then_branch=
graph(
name=thenGraph_14,
inputs=(
),
outputs=(
%"logsum_exp__356"<FLOAT,?>
),
) {
0 | # n0__356_141
%"tmp__356"<FLOAT,[unk__524]> ⬅️ ::Cast(%"query_second_dims__356") {to=1}
1 | # n1__356_142
%"const__356"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
2 | # n2__356_143
%"const_cast__356"<FLOAT,?> ⬅️ ::CastLike(%"const__356", %"tmp__356")
3 | # n3__356_144
%"tmp_5__356"<FLOAT,[unk__524]> ⬅️ ::Div(%"tmp__356", %"const_cast__356")
4 | # n4__356_145
%"tmp_6__356"<FLOAT,[unk__524]> ⬅️ ::Ceil(%"tmp_5__356")
5 | # n5__356_146
%"const_7__356"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_7')}
6 | # n6__356_147
%"const_7_cast__356"<FLOAT,?> ⬅️ ::CastLike(%"const_7__356", %"tmp_6__356")
7 | # n7__356_148
%"tmp_8__356"<FLOAT,[unk__524]> ⬅️ ::Mul(%"tmp_6__356", %"const_7_cast__356")
8 | # n8__356_149
%"logsumexp_dim__356"<INT64,[unk__524]> ⬅️ ::Cast(%"tmp_8__356") {to=7}
9 | # n9__356_150
%"const_9__356"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
10 | # n10__356_151
%"tmp_10__356"<INT64,[unk__526]> ⬅️ ::Concat(%"query_first_dims__356", %"num_heads__356", %"logsumexp_dim__356") {axis=0}
11 | # n11__356_152
%"logsum_exp__356"<FLOAT,?> ⬅️ ::Expand(%"const_9__356", %"tmp_10__356")
return %"logsum_exp__356"<FLOAT,?>
}, else_branch=
graph(
name=elseGraph_14,
inputs=(
),
outputs=(
%"logsum_exp_14__356"<FLOAT,?>
),
) {
0 | # n0__356_153
%"const_11__356"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_11')}
1 | # n1__356_154
%"int64_0_1d_12__356"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_12')}
2 | # n2__356_155
%"int64_0_1d_12_cast__356"<INT64,[1]> ⬅️ ::CastLike(%"int64_0_1d_12__356", %"num_heads__356")
3 | # n3__356_156
%"tmp_13__356"<INT64,[unk__527]> ⬅️ ::Concat(%"query_first_dims__356", %"num_heads__356", %"int64_0_1d_12_cast__356") {axis=0}
4 | # n4__356_157
%"logsum_exp_14__356"<FLOAT,?> ⬅️ ::Expand(%"const_11__356", %"tmp_13__356")
return %"logsum_exp_14__356"<FLOAT,?>
}}
2106 | # n17__356
%"tmp_16__356"<INT64,[0]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[0]>('Empty_INTS')}
2107 | # n18__356
%"tmp_17__356"<FLOAT,?> ⬅️ ::ConstantOfShape(%"tmp_16__356")
2108 | # n19__356
%"_scaled_dot_product_efficient_attention_3_3__310"<INT64,?> ⬅️ ::Cast(%"tmp_17__356") {to=7}
2109 | # Transpose_324__310
%"transpose_15__310"<FLOAT16,?> ⬅️ ::Transpose(%"_scaled_dot_product_efficient_attention_3__310") {perm=[0, 2, 1, 3]}
2110 | # Constant_325__310
%"_val_276__310"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2111 | # n0__357
%"size_0__357"<INT64,[3]> ⬅️ ::Cast(%"_val_276__310") {to=7}
2112 | # n1__357
%"view_72__310"<FLOAT16,[unk__528,unk__529,unk__530]> ⬅️ ::Reshape(%"transpose_15__310", %"size_0__357")
2113 | # n0__360
%"tmp__360"<INT64,[2]> ⬅️ ::Shape(%"model.layers.3.self_attn.o_proj.weight")
2114 | # n1__360
%"rank__359"<INT64,?> ⬅️ ::Size(%"tmp__360")
2115 | # n1__359
%"int64_2__359"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
2116 | # n2__359
%"int64_2_cast__359"<INT64,?> ⬅️ ::CastLike(%"int64_2__359", %"rank__359")
2117 | # n3__359
%"cond__359"<BOOL,?> ⬅️ ::Equal(%"rank__359", %"int64_2_cast__359")
2118 | # n4__359
%"t_24__358"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__359") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__359"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__359_158
%"result__359"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.3.self_attn.o_proj.weight") {perm=[1, 0]}
return %"result__359"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__359"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__359_159
%"result_0__359"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.3.self_attn.o_proj.weight")
return %"result_0__359"<FLOAT16,[4096,4096]>
}}
2119 | # Constant_3__358
%"_val_3__358"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
2120 | # n0__361
%"size_0__361"<INT64,[2]> ⬅️ ::Cast(%"_val_3__358") {to=7}
2121 | # n1__361
%"view_73__358"<FLOAT16,[unk__531,unk__532]> ⬅️ ::Reshape(%"view_72__310", %"size_0__361")
2122 | # n0__362
%"mm_24__358"<FLOAT16,[unk__531,4096]> ⬅️ ::MatMul(%"view_73__358", %"t_24__358")
2123 | # Constant_6__358
%"_val_6__358"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2124 | # n0__363
%"size_0__363"<INT64,[3]> ⬅️ ::Cast(%"_val_6__358") {to=7}
2125 | # n1__363
%"model_layers_3_self_attn_1_2__300"<FLOAT16,[unk__533,unk__534,unk__535]> ⬅️ ::Reshape(%"mm_24__358", %"size_0__363")
2126 | # n0__364
%"alpha__364"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
2127 | # n1__364
%"alpha_0__364"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__364", %"model_layers_3_self_attn_1_2__300")
2128 | # n2__364
%"other_1__364"<FLOAT16,[unk__533,unk__534,unk__535]> ⬅️ ::Mul(%"model_layers_3_self_attn_1_2__300", %"alpha_0__364")
2129 | # n3__364
%"add_21__300"<FLOAT16,[unk__536,128,4096]> ⬅️ ::Add(%"model_layers_2_1_2__1", %"other_1__364")
2130 | # Cast_3__365
%"_to_copy_20__365"<FLOAT,[unk__536,128,4096]> ⬅️ ::Cast(%"add_21__300") {to=1}
2131 | # Constant_4__365
%"_val_2__365"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2132 | # Cast_5__365
%"scalar_tensor_default_7__365"<FLOAT,?> ⬅️ ::Cast(%"_val_2__365") {to=1}
2133 | # n0__366
%"pow_8__365"<FLOAT,[unk__536,128,4096]> ⬅️ ::Pow(%"_to_copy_20__365", %"scalar_tensor_default_7__365")
2134 | # Constant_7__365
%"_val_5__365"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
2135 | # n0__368
%"tmp__368"<INT64,[3]> ⬅️ ::Shape(%"pow_8__365")
2136 | # n1__368
%"tmp_0__368"<INT64,?> ⬅️ ::Size(%"tmp__368")
2137 | # n2__368
%"tmp_1__368"<INT64,?> ⬅️ ::Constant() {value_int=0}
2138 | # n3__368
%"cond__367"<BOOL,?> ⬅️ ::Equal(%"tmp_0__368", %"tmp_1__368")
2139 | # n1__367
%"mean_7__365"<FLOAT,?> ⬅️ ::If(%"cond__367") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__367"<FLOAT,[unk__536,128,4096]>
),
) {
0 | # n0__367_160
%"result__367"<FLOAT,[unk__536,128,4096]> ⬅️ ::Identity(%"pow_8__365")
return %"result__367"<FLOAT,[unk__536,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__367"<FLOAT,?>
),
) {
0 | # n0__369
%"tmp__369"<INT64,[1]> ⬅️ ::Shape(%"_val_5__365")
1 | # n1__369
%"tmp_0__369"<INT64,?> ⬅️ ::Size(%"tmp__369")
2 | # n2__369
%"tmp_1__369"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__369
%"cond_0__367"<BOOL,?> ⬅️ ::Equal(%"tmp_0__369", %"tmp_1__369")
4 | # n1__367_162
%"dim_3__367"<INT64,?> ⬅️ ::If(%"cond_0__367") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__367"<INT64,[1,1]>
),
) {
0 | # n0__367_163
%"int64_0__367"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__367_164
%"dim_1__367"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__365", %"int64_0__367")
return %"dim_1__367"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__367"<INT64,[1]>
),
) {
0 | # n0__367_165
%"dim_2__367"<INT64,[1]> ⬅️ ::Identity(%"_val_5__365")
return %"dim_2__367"<INT64,[1]>
}}
5 | # n2__367
%"result_4__367"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_8__365", %"dim_3__367") {keepdims=1}
return %"result_4__367"<FLOAT,?>
}}
2140 | # Constant_9__365
%"_val_7__365"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
2141 | # n0__370
%"alpha__370"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
2142 | # n1__370
%"alpha_0__370"<FLOAT,?> ⬅️ ::CastLike(%"alpha__370", %"_val_7__365")
2143 | # n2__370
%"other_1__370"<FLOAT,?> ⬅️ ::Mul(%"_val_7__365", %"alpha_0__370")
2144 | # n3__370
%"add_22__365"<FLOAT,?> ⬅️ ::Add(%"mean_7__365", %"other_1__370")
2145 | # n0__371
%"tmp__371"<FLOAT,?> ⬅️ ::Sqrt(%"add_22__365")
2146 | # n1__371
%"rsqrt_7__365"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__371")
2147 | # n0__372
%"mul_36__365"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_20__365", %"rsqrt_7__365")
2148 | # Cast_13__365
%"_to_copy_21__365"<FLOAT16,?> ⬅️ ::Cast(%"mul_36__365") {to=10}
2149 | # n0__373
%"model_layers_3_post_attention_layernorm_1__300"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.3.post_attention_layernorm.weight", %"_to_copy_21__365")
2150 | # n0__377
%"tmp__377"<INT64,[2]> ⬅️ ::Shape(%"model.layers.3.mlp.gate_proj.weight")
2151 | # n1__377
%"rank__376"<INT64,?> ⬅️ ::Size(%"tmp__377")
2152 | # n1__376
%"int64_2__376"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
2153 | # n2__376
%"int64_2_cast__376"<INT64,?> ⬅️ ::CastLike(%"int64_2__376", %"rank__376")
2154 | # n3__376
%"cond__376"<BOOL,?> ⬅️ ::Equal(%"rank__376", %"int64_2_cast__376")
2155 | # n4__376
%"t_25__375"<FLOAT16,[unk__537,unk__538]> ⬅️ ::If(%"cond__376") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__376"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__376_166
%"result__376"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.3.mlp.gate_proj.weight") {perm=[1, 0]}
return %"result__376"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__376"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__376_167
%"result_0__376"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.3.mlp.gate_proj.weight")
return %"result_0__376"<FLOAT16,[14336,4096]>
}}
2156 | # Constant_3__375
%"_val_3__375"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
2157 | # n0__378
%"size_0__378"<INT64,[2]> ⬅️ ::Cast(%"_val_3__375") {to=7}
2158 | # n1__378
%"view_75__375"<FLOAT16,[unk__539,unk__540]> ⬅️ ::Reshape(%"model_layers_3_post_attention_layernorm_1__300", %"size_0__378")
2159 | # n0__379
%"mm_25__375"<FLOAT16,[unk__539,unk__538]> ⬅️ ::MatMul(%"view_75__375", %"t_25__375")
2160 | # Constant_6__375
%"_val_6__375"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2161 | # n0__380
%"size_0__380"<INT64,[3]> ⬅️ ::Cast(%"_val_6__375") {to=7}
2162 | # n1__380
%"model_layers_3_mlp_gate_proj_1__374"<FLOAT16,[unk__541,unk__542,unk__543]> ⬅️ ::Reshape(%"mm_25__375", %"size_0__380")
2163 | # Cast_0__381
%"_to_copy_22__381"<FLOAT,[unk__541,unk__542,unk__543]> ⬅️ ::Cast(%"model_layers_3_mlp_gate_proj_1__374") {to=1}
2164 | # n0__382
%"sigmoid_3__381"<FLOAT,[unk__541,unk__542,unk__543]> ⬅️ ::Sigmoid(%"_to_copy_22__381")
2165 | # n0__383
%"mul_38__381"<FLOAT,[unk__541,unk__542,unk__543]> ⬅️ ::Mul(%"_to_copy_22__381", %"sigmoid_3__381")
2166 | # Cast_3__381
%"model_layers_3_mlp_act_fn_1__374"<FLOAT16,[unk__541,unk__542,unk__543]> ⬅️ ::Cast(%"mul_38__381") {to=10}
2167 | # n0__386
%"tmp__386"<INT64,[2]> ⬅️ ::Shape(%"model.layers.3.mlp.up_proj.weight")
2168 | # n1__386
%"rank__385"<INT64,?> ⬅️ ::Size(%"tmp__386")
2169 | # n1__385
%"int64_2__385"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
2170 | # n2__385
%"int64_2_cast__385"<INT64,?> ⬅️ ::CastLike(%"int64_2__385", %"rank__385")
2171 | # n3__385
%"cond__385"<BOOL,?> ⬅️ ::Equal(%"rank__385", %"int64_2_cast__385")
2172 | # n4__385
%"t_26__384"<FLOAT16,[unk__544,unk__545]> ⬅️ ::If(%"cond__385") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__385"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__385_168
%"result__385"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.3.mlp.up_proj.weight") {perm=[1, 0]}
return %"result__385"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__385"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__385_169
%"result_0__385"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.3.mlp.up_proj.weight")
return %"result_0__385"<FLOAT16,[14336,4096]>
}}
2173 | # Constant_3__384
%"_val_3__384"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
2174 | # n0__387
%"size_0__387"<INT64,[2]> ⬅️ ::Cast(%"_val_3__384") {to=7}
2175 | # n1__387
%"view_77__384"<FLOAT16,[unk__546,unk__547]> ⬅️ ::Reshape(%"model_layers_3_post_attention_layernorm_1__300", %"size_0__387")
2176 | # n0__388
%"mm_26__384"<FLOAT16,[unk__546,unk__545]> ⬅️ ::MatMul(%"view_77__384", %"t_26__384")
2177 | # Constant_6__384
%"_val_6__384"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2178 | # n0__389
%"size_0__389"<INT64,[3]> ⬅️ ::Cast(%"_val_6__384") {to=7}
2179 | # n1__389
%"model_layers_3_mlp_up_proj_1__374"<FLOAT16,[unk__548,unk__549,unk__550]> ⬅️ ::Reshape(%"mm_26__384", %"size_0__389")
2180 | # n0__390
%"mul_39__374"<FLOAT16,[unk__551,unk__552,unk__553]> ⬅️ ::Mul(%"model_layers_3_mlp_act_fn_1__374", %"model_layers_3_mlp_up_proj_1__374")
2181 | # n0__393
%"tmp__393"<INT64,[2]> ⬅️ ::Shape(%"model.layers.3.mlp.down_proj.weight")
2182 | # n1__393
%"rank__392"<INT64,?> ⬅️ ::Size(%"tmp__393")
2183 | # n1__392
%"int64_2__392"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
2184 | # n2__392
%"int64_2_cast__392"<INT64,?> ⬅️ ::CastLike(%"int64_2__392", %"rank__392")
2185 | # n3__392
%"cond__392"<BOOL,?> ⬅️ ::Equal(%"rank__392", %"int64_2_cast__392")
2186 | # n4__392
%"t_27__391"<FLOAT16,[unk__554,unk__555]> ⬅️ ::If(%"cond__392") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__392"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__392_170
%"result__392"<FLOAT16,[14336,4096]> ⬅️ ::Transpose(%"model.layers.3.mlp.down_proj.weight") {perm=[1, 0]}
return %"result__392"<FLOAT16,[14336,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__392"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__392_171
%"result_0__392"<FLOAT16,[4096,14336]> ⬅️ ::Identity(%"model.layers.3.mlp.down_proj.weight")
return %"result_0__392"<FLOAT16,[4096,14336]>
}}
2187 | # Constant_3__391
%"_val_3__391"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
2188 | # n0__394
%"size_0__394"<INT64,[2]> ⬅️ ::Cast(%"_val_3__391") {to=7}
2189 | # n1__394
%"view_79__391"<FLOAT16,[unk__556,unk__557]> ⬅️ ::Reshape(%"mul_39__374", %"size_0__394")
2190 | # n0__395
%"mm_27__391"<FLOAT16,[unk__556,unk__555]> ⬅️ ::MatMul(%"view_79__391", %"t_27__391")
2191 | # Constant_6__391
%"_val_6__391"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2192 | # n0__396
%"size_0__396"<INT64,[3]> ⬅️ ::Cast(%"_val_6__391") {to=7}
2193 | # n1__396
%"model_layers_3_mlp_1__300"<FLOAT16,[unk__558,unk__559,unk__560]> ⬅️ ::Reshape(%"mm_27__391", %"size_0__396")
2194 | # n0__397
%"alpha__397"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
2195 | # n1__397
%"alpha_0__397"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__397", %"model_layers_3_mlp_1__300")
2196 | # n2__397
%"other_1__397"<FLOAT16,[unk__558,unk__559,unk__560]> ⬅️ ::Mul(%"model_layers_3_mlp_1__300", %"alpha_0__397")
2197 | # n3__397
%"model_layers_3_1_2__1"<FLOAT16,[unk__561,128,4096]> ⬅️ ::Add(%"add_21__300", %"other_1__397")
2198 | # Cast_3__399
%"_to_copy_24__399"<FLOAT,[unk__561,128,4096]> ⬅️ ::Cast(%"model_layers_3_1_2__1") {to=1}
2199 | # Constant_4__399
%"_val_2__399"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2200 | # Cast_5__399
%"scalar_tensor_default_8__399"<FLOAT,?> ⬅️ ::Cast(%"_val_2__399") {to=1}
2201 | # n0__400
%"pow_9__399"<FLOAT,[unk__561,128,4096]> ⬅️ ::Pow(%"_to_copy_24__399", %"scalar_tensor_default_8__399")
2202 | # Constant_7__399
%"_val_5__399"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
2203 | # n0__402
%"tmp__402"<INT64,[3]> ⬅️ ::Shape(%"pow_9__399")
2204 | # n1__402
%"tmp_0__402"<INT64,?> ⬅️ ::Size(%"tmp__402")
2205 | # n2__402
%"tmp_1__402"<INT64,?> ⬅️ ::Constant() {value_int=0}
2206 | # n3__402
%"cond__401"<BOOL,?> ⬅️ ::Equal(%"tmp_0__402", %"tmp_1__402")
2207 | # n1__401
%"mean_8__399"<FLOAT,?> ⬅️ ::If(%"cond__401") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__401"<FLOAT,[unk__561,128,4096]>
),
) {
0 | # n0__401_172
%"result__401"<FLOAT,[unk__561,128,4096]> ⬅️ ::Identity(%"pow_9__399")
return %"result__401"<FLOAT,[unk__561,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__401"<FLOAT,?>
),
) {
0 | # n0__403
%"tmp__403"<INT64,[1]> ⬅️ ::Shape(%"_val_5__399")
1 | # n1__403
%"tmp_0__403"<INT64,?> ⬅️ ::Size(%"tmp__403")
2 | # n2__403
%"tmp_1__403"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__403
%"cond_0__401"<BOOL,?> ⬅️ ::Equal(%"tmp_0__403", %"tmp_1__403")
4 | # n1__401_174
%"dim_3__401"<INT64,?> ⬅️ ::If(%"cond_0__401") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__401"<INT64,[1,1]>
),
) {
0 | # n0__401_175
%"int64_0__401"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__401_176
%"dim_1__401"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__399", %"int64_0__401")
return %"dim_1__401"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__401"<INT64,[1]>
),
) {
0 | # n0__401_177
%"dim_2__401"<INT64,[1]> ⬅️ ::Identity(%"_val_5__399")
return %"dim_2__401"<INT64,[1]>
}}
5 | # n2__401
%"result_4__401"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_9__399", %"dim_3__401") {keepdims=1}
return %"result_4__401"<FLOAT,?>
}}
2208 | # Constant_9__399
%"_val_7__399"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
2209 | # n0__404
%"alpha__404"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
2210 | # n1__404
%"alpha_0__404"<FLOAT,?> ⬅️ ::CastLike(%"alpha__404", %"_val_7__399")
2211 | # n2__404
%"other_1__404"<FLOAT,?> ⬅️ ::Mul(%"_val_7__399", %"alpha_0__404")
2212 | # n3__404
%"add_24__399"<FLOAT,?> ⬅️ ::Add(%"mean_8__399", %"other_1__404")
2213 | # n0__405
%"tmp__405"<FLOAT,?> ⬅️ ::Sqrt(%"add_24__399")
2214 | # n1__405
%"rsqrt_8__399"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__405")
2215 | # n0__406
%"mul_40__399"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_24__399", %"rsqrt_8__399")
2216 | # Cast_13__399
%"_to_copy_25__399"<FLOAT16,?> ⬅️ ::Cast(%"mul_40__399") {to=10}
2217 | # n0__407
%"model_layers_4_input_layernorm_1__398"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.4.input_layernorm.weight", %"_to_copy_25__399")
2218 | # n0__411
%"tmp__411"<INT64,[2]> ⬅️ ::Shape(%"model.layers.4.self_attn.q_proj.weight")
2219 | # n1__411
%"rank__410"<INT64,?> ⬅️ ::Size(%"tmp__411")
2220 | # n1__410
%"int64_2__410"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
2221 | # n2__410
%"int64_2_cast__410"<INT64,?> ⬅️ ::CastLike(%"int64_2__410", %"rank__410")
2222 | # n3__410
%"cond__410"<BOOL,?> ⬅️ ::Equal(%"rank__410", %"int64_2_cast__410")
2223 | # n4__410
%"t_28__409"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__410") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__410"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__410_178
%"result__410"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.4.self_attn.q_proj.weight") {perm=[1, 0]}
return %"result__410"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__410"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__410_179
%"result_0__410"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.4.self_attn.q_proj.weight")
return %"result_0__410"<FLOAT16,[4096,4096]>
}}
2224 | # Constant_3__409
%"_val_3__409"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
2225 | # n0__412
%"size_0__412"<INT64,[2]> ⬅️ ::Cast(%"_val_3__409") {to=7}
2226 | # n1__412
%"view_81__409"<FLOAT16,[unk__562,unk__563]> ⬅️ ::Reshape(%"model_layers_4_input_layernorm_1__398", %"size_0__412")
2227 | # n0__413
%"mm_28__409"<FLOAT16,[unk__562,4096]> ⬅️ ::MatMul(%"view_81__409", %"t_28__409")
2228 | # Constant_6__409
%"_val_6__409"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2229 | # n0__414
%"size_0__414"<INT64,[3]> ⬅️ ::Cast(%"_val_6__409") {to=7}
2230 | # n1__414
%"model_layers_4_self_attn_q_proj_1__408"<FLOAT16,[unk__564,unk__565,unk__566]> ⬅️ ::Reshape(%"mm_28__409", %"size_0__414")
2231 | # n0__417
%"tmp__417"<INT64,[2]> ⬅️ ::Shape(%"model.layers.4.self_attn.k_proj.weight")
2232 | # n1__417
%"rank__416"<INT64,?> ⬅️ ::Size(%"tmp__417")
2233 | # n1__416
%"int64_2__416"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
2234 | # n2__416
%"int64_2_cast__416"<INT64,?> ⬅️ ::CastLike(%"int64_2__416", %"rank__416")
2235 | # n3__416
%"cond__416"<BOOL,?> ⬅️ ::Equal(%"rank__416", %"int64_2_cast__416")
2236 | # n4__416
%"t_29__415"<FLOAT16,[unk__567,unk__568]> ⬅️ ::If(%"cond__416") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__416"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__416_180
%"result__416"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.4.self_attn.k_proj.weight") {perm=[1, 0]}
return %"result__416"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__416"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__416_181
%"result_0__416"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.4.self_attn.k_proj.weight")
return %"result_0__416"<FLOAT16,[1024,4096]>
}}
2237 | # Constant_3__415
%"_val_3__415"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
2238 | # n0__418
%"size_0__418"<INT64,[2]> ⬅️ ::Cast(%"_val_3__415") {to=7}
2239 | # n1__418
%"view_83__415"<FLOAT16,[unk__569,unk__570]> ⬅️ ::Reshape(%"model_layers_4_input_layernorm_1__398", %"size_0__418")
2240 | # n0__419
%"mm_29__415"<FLOAT16,[unk__569,unk__568]> ⬅️ ::MatMul(%"view_83__415", %"t_29__415")
2241 | # Constant_6__415
%"_val_6__415"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2242 | # n0__420
%"size_0__420"<INT64,[3]> ⬅️ ::Cast(%"_val_6__415") {to=7}
2243 | # n1__420
%"model_layers_4_self_attn_k_proj_1__408"<FLOAT16,[unk__571,unk__572,unk__573]> ⬅️ ::Reshape(%"mm_29__415", %"size_0__420")
2244 | # n0__423
%"tmp__423"<INT64,[2]> ⬅️ ::Shape(%"model.layers.4.self_attn.v_proj.weight")
2245 | # n1__423
%"rank__422"<INT64,?> ⬅️ ::Size(%"tmp__423")
2246 | # n1__422
%"int64_2__422"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
2247 | # n2__422
%"int64_2_cast__422"<INT64,?> ⬅️ ::CastLike(%"int64_2__422", %"rank__422")
2248 | # n3__422
%"cond__422"<BOOL,?> ⬅️ ::Equal(%"rank__422", %"int64_2_cast__422")
2249 | # n4__422
%"t_30__421"<FLOAT16,[unk__574,unk__575]> ⬅️ ::If(%"cond__422") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__422"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__422_182
%"result__422"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.4.self_attn.v_proj.weight") {perm=[1, 0]}
return %"result__422"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__422"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__422_183
%"result_0__422"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.4.self_attn.v_proj.weight")
return %"result_0__422"<FLOAT16,[1024,4096]>
}}
2250 | # Constant_3__421
%"_val_3__421"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
2251 | # n0__424
%"size_0__424"<INT64,[2]> ⬅️ ::Cast(%"_val_3__421") {to=7}
2252 | # n1__424
%"view_85__421"<FLOAT16,[unk__576,unk__577]> ⬅️ ::Reshape(%"model_layers_4_input_layernorm_1__398", %"size_0__424")
2253 | # n0__425
%"mm_30__421"<FLOAT16,[unk__576,unk__575]> ⬅️ ::MatMul(%"view_85__421", %"t_30__421")
2254 | # Constant_6__421
%"_val_6__421"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2255 | # n0__426
%"size_0__426"<INT64,[3]> ⬅️ ::Cast(%"_val_6__421") {to=7}
2256 | # n1__426
%"model_layers_4_self_attn_v_proj_1__408"<FLOAT16,[unk__578,unk__579,unk__580]> ⬅️ ::Reshape(%"mm_30__421", %"size_0__426")
2257 | # Constant_61__408
%"_val_8__408"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
2258 | # n0__427
%"size_0__427"<INT64,[4]> ⬅️ ::Cast(%"_val_8__408") {to=7}
2259 | # n1__427
%"view_87__408"<FLOAT16,[unk__581,unk__582,unk__583,unk__584]> ⬅️ ::Reshape(%"model_layers_4_self_attn_q_proj_1__408", %"size_0__427")
2260 | # Transpose_63__408
%"transpose_16__408"<FLOAT16,[unk__581,unk__583,unk__582,unk__584]> ⬅️ ::Transpose(%"view_87__408") {perm=[0, 2, 1, 3]}
2261 | # Constant_64__408
%"_val_11__408"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
2262 | # n0__428
%"size_0__428"<INT64,[4]> ⬅️ ::Cast(%"_val_11__408") {to=7}
2263 | # n1__428
%"view_88__408"<FLOAT16,[unk__585,unk__586,unk__587,unk__588]> ⬅️ ::Reshape(%"model_layers_4_self_attn_k_proj_1__408", %"size_0__428")
2264 | # Transpose_66__408
%"transpose_17__408"<FLOAT16,[unk__585,unk__587,unk__586,unk__588]> ⬅️ ::Transpose(%"view_88__408") {perm=[0, 2, 1, 3]}
2265 | # Constant_67__408
%"_val_14__408"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
2266 | # n0__429
%"size_0__429"<INT64,[4]> ⬅️ ::Cast(%"_val_14__408") {to=7}
2267 | # n1__429
%"view_89__408"<FLOAT16,[unk__589,unk__590,unk__591,unk__592]> ⬅️ ::Reshape(%"model_layers_4_self_attn_v_proj_1__408", %"size_0__429")
2268 | # Transpose_69__408
%"model_1_8"<FLOAT16,[1,8,128,128]> ⬅️ ::Transpose(%"view_89__408") {perm=[0, 2, 1, 3]}
2269 | # Constant_8__430
%"_val_1__430"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2270 | # Cast_9__430
%"_val_2__430"<INT64,?> ⬅️ ::Cast(%"_val_1__430") {to=7}
2271 | # Constant_10__430
%"_val_3__430"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2272 | # Reshape_11__430
%"_val_4__430"<INT64,[1]> ⬅️ ::Reshape(%"_val_2__430", %"_val_3__430") {allowzero=0}
2273 | # Constant_12__430
%"_val_5__430"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2274 | # Cast_13__430
%"_val_6__430"<INT64,?> ⬅️ ::Cast(%"_val_5__430") {to=7}
2275 | # Constant_14__430
%"_val_7__430"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2276 | # Reshape_15__430
%"_val_8__430"<INT64,[1]> ⬅️ ::Reshape(%"_val_6__430", %"_val_7__430") {allowzero=0}
2277 | # Constant_16__430
%"_val_9__430"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2278 | # Cast_17__430
%"_val_10__430"<INT64,?> ⬅️ ::Cast(%"_val_9__430") {to=7}
2279 | # Constant_18__430
%"_val_11__430"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2280 | # Reshape_19__430
%"_val_12__430"<INT64,[1]> ⬅️ ::Reshape(%"_val_10__430", %"_val_11__430") {allowzero=0}
2281 | # Constant_20__430
%"_val_13__430"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2282 | # Cast_21__430
%"_val_14__430"<INT64,?> ⬅️ ::Cast(%"_val_13__430") {to=7}
2283 | # Constant_22__430
%"_val_15__430"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2284 | # Reshape_23__430
%"_val_16__430"<INT64,[1]> ⬅️ ::Reshape(%"_val_14__430", %"_val_15__430") {allowzero=0}
2285 | # Slice_24__430
%"model_layers_4_self_attn_rotary_emb_1__408"<FLOAT16,[unk__593,unk__594]> ⬅️ ::Slice(%"model.layers.4.self_attn.rotary_emb.cos_cached", %"_val_4__430", %"_val_8__430", %"_val_12__430", %"_val_16__430")
2286 | # Constant_25__430
%"_val_19__430"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2287 | # Cast_26__430
%"_val_20__430"<INT64,?> ⬅️ ::Cast(%"_val_19__430") {to=7}
2288 | # Constant_27__430
%"_val_21__430"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2289 | # Reshape_28__430
%"_val_22__430"<INT64,[1]> ⬅️ ::Reshape(%"_val_20__430", %"_val_21__430") {allowzero=0}
2290 | # Constant_29__430
%"_val_23__430"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2291 | # Cast_30__430
%"_val_24__430"<INT64,?> ⬅️ ::Cast(%"_val_23__430") {to=7}
2292 | # Constant_31__430
%"_val_25__430"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2293 | # Reshape_32__430
%"_val_26__430"<INT64,[1]> ⬅️ ::Reshape(%"_val_24__430", %"_val_25__430") {allowzero=0}
2294 | # Constant_33__430
%"_val_27__430"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2295 | # Cast_34__430
%"_val_28__430"<INT64,?> ⬅️ ::Cast(%"_val_27__430") {to=7}
2296 | # Constant_35__430
%"_val_29__430"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2297 | # Reshape_36__430
%"_val_30__430"<INT64,[1]> ⬅️ ::Reshape(%"_val_28__430", %"_val_29__430") {allowzero=0}
2298 | # Constant_37__430
%"_val_31__430"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2299 | # Cast_38__430
%"_val_32__430"<INT64,?> ⬅️ ::Cast(%"_val_31__430") {to=7}
2300 | # Constant_39__430
%"_val_33__430"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2301 | # Reshape_40__430
%"_val_34__430"<INT64,[1]> ⬅️ ::Reshape(%"_val_32__430", %"_val_33__430") {allowzero=0}
2302 | # Slice_41__430
%"model_layers_4_self_attn_rotary_emb_1_1__408"<FLOAT16,[unk__595,unk__596]> ⬅️ ::Slice(%"model.layers.4.self_attn.rotary_emb.sin_cached", %"_val_22__430", %"_val_26__430", %"_val_30__430", %"_val_34__430")
2303 | # Transpose_71__408
%"_val_21__408"<FLOAT16,[unk__593,unk__594]> ⬅️ ::Transpose(%"model_layers_4_self_attn_rotary_emb_1__408") {perm=[0, 1]}
2304 | # Max_72__408
%"_val_22__408"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
2305 | # Shape_73__408
%"_val_23__408"<INT64,[2]> ⬅️ ::Shape(%"_val_22__408") {start=0}
2306 | # Expand_74__408
%"_val_24__408"<INT64,[unk__597,unk__598]> ⬅️ ::Expand(%"view__1", %"_val_23__408")
2307 | # Constant_75__408
%"_val_25__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2308 | # Unsqueeze_76__408
%"_val_26__408"<INT64,[unk__597,unk__598,1]> ⬅️ ::Unsqueeze(%"_val_24__408", %"_val_25__408")
2309 | # Concat_77__408
%"_val_27__408"<INT64,[unk__597,unk__598,1]> ⬅️ ::Concat(%"_val_26__408") {axis=-1}
2310 | # GatherND_78__408
%"_val_28__408"<FLOAT16,[unk__597,unk__598,unk__594]> ⬅️ ::GatherND(%"_val_21__408", %"_val_27__408") {batch_dims=0}
2311 | # Transpose_79__408
%"index_8__408"<FLOAT16,[unk__597,unk__598,unk__594]> ⬅️ ::Transpose(%"_val_28__408") {perm=[0, 1, 2]}
2312 | # n0__431
%"dim__431"<INT64,?> ⬅️ ::Constant() {value_int=1}
2313 | # n1__431
%"dim_0__431"<INT64,?> ⬅️ ::Cast(%"dim__431") {to=7}
2314 | # n2__431
%"unsqueeze_17__408"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_8__408", %"dim_0__431")
2315 | # Transpose_81__408
%"_val_31__408"<FLOAT16,[unk__595,unk__596]> ⬅️ ::Transpose(%"model_layers_4_self_attn_rotary_emb_1_1__408") {perm=[0, 1]}
2316 | # Max_82__408
%"_val_32__408"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
2317 | # Shape_83__408
%"_val_33__408"<INT64,[2]> ⬅️ ::Shape(%"_val_32__408") {start=0}
2318 | # Expand_84__408
%"_val_34__408"<INT64,[unk__599,unk__600]> ⬅️ ::Expand(%"view__1", %"_val_33__408")
2319 | # Constant_85__408
%"_val_35__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2320 | # Unsqueeze_86__408
%"_val_36__408"<INT64,[unk__599,unk__600,1]> ⬅️ ::Unsqueeze(%"_val_34__408", %"_val_35__408")
2321 | # Concat_87__408
%"_val_37__408"<INT64,[unk__599,unk__600,1]> ⬅️ ::Concat(%"_val_36__408") {axis=-1}
2322 | # GatherND_88__408
%"_val_38__408"<FLOAT16,[unk__599,unk__600,unk__596]> ⬅️ ::GatherND(%"_val_31__408", %"_val_37__408") {batch_dims=0}
2323 | # Transpose_89__408
%"index_9__408"<FLOAT16,[unk__599,unk__600,unk__596]> ⬅️ ::Transpose(%"_val_38__408") {perm=[0, 1, 2]}
2324 | # n0__432
%"dim__432"<INT64,?> ⬅️ ::Constant() {value_int=1}
2325 | # n1__432
%"dim_0__432"<INT64,?> ⬅️ ::Cast(%"dim__432") {to=7}
2326 | # n2__432
%"unsqueeze_18__408"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_9__408", %"dim_0__432")
2327 | # n0__433
%"mul_42__408"<FLOAT16,?> ⬅️ ::Mul(%"transpose_16__408", %"unsqueeze_17__408")
2328 | # Constant_92__408
%"_val_42__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2329 | # Cast_93__408
%"_val_43__408"<INT64,?> ⬅️ ::Cast(%"_val_42__408") {to=7}
2330 | # Constant_94__408
%"_val_44__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2331 | # Reshape_95__408
%"_val_45__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_43__408", %"_val_44__408") {allowzero=0}
2332 | # Constant_96__408
%"_val_46__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2333 | # Cast_97__408
%"_val_47__408"<INT64,?> ⬅️ ::Cast(%"_val_46__408") {to=7}
2334 | # Constant_98__408
%"_val_48__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2335 | # Reshape_99__408
%"_val_49__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_47__408", %"_val_48__408") {allowzero=0}
2336 | # Constant_100__408
%"_val_50__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2337 | # Cast_101__408
%"_val_51__408"<INT64,?> ⬅️ ::Cast(%"_val_50__408") {to=7}
2338 | # Constant_102__408
%"_val_52__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2339 | # Reshape_103__408
%"_val_53__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_51__408", %"_val_52__408") {allowzero=0}
2340 | # Constant_104__408
%"_val_54__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2341 | # Cast_105__408
%"_val_55__408"<INT64,?> ⬅️ ::Cast(%"_val_54__408") {to=7}
2342 | # Constant_106__408
%"_val_56__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2343 | # Reshape_107__408
%"_val_57__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_55__408", %"_val_56__408") {allowzero=0}
2344 | # Slice_108__408
%"slice_59__408"<FLOAT16,[unk__601,unk__602,unk__603,unk__604]> ⬅️ ::Slice(%"transpose_16__408", %"_val_45__408", %"_val_49__408", %"_val_53__408", %"_val_57__408")
2345 | # Constant_109__408
%"_val_59__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2346 | # Cast_110__408
%"_val_60__408"<INT64,?> ⬅️ ::Cast(%"_val_59__408") {to=7}
2347 | # Constant_111__408
%"_val_61__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2348 | # Reshape_112__408
%"_val_62__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_60__408", %"_val_61__408") {allowzero=0}
2349 | # Constant_113__408
%"_val_63__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2350 | # Cast_114__408
%"_val_64__408"<INT64,?> ⬅️ ::Cast(%"_val_63__408") {to=7}
2351 | # Constant_115__408
%"_val_65__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2352 | # Reshape_116__408
%"_val_66__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_64__408", %"_val_65__408") {allowzero=0}
2353 | # Constant_117__408
%"_val_67__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2354 | # Cast_118__408
%"_val_68__408"<INT64,?> ⬅️ ::Cast(%"_val_67__408") {to=7}
2355 | # Constant_119__408
%"_val_69__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2356 | # Reshape_120__408
%"_val_70__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_68__408", %"_val_69__408") {allowzero=0}
2357 | # Constant_121__408
%"_val_71__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2358 | # Cast_122__408
%"_val_72__408"<INT64,?> ⬅️ ::Cast(%"_val_71__408") {to=7}
2359 | # Constant_123__408
%"_val_73__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2360 | # Reshape_124__408
%"_val_74__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_72__408", %"_val_73__408") {allowzero=0}
2361 | # Slice_125__408
%"slice_60__408"<FLOAT16,[unk__605,unk__606,unk__607,unk__608]> ⬅️ ::Slice(%"transpose_16__408", %"_val_62__408", %"_val_66__408", %"_val_70__408", %"_val_74__408")
2362 | # n0__434
%"neg_8__408"<FLOAT16,[unk__605,unk__606,unk__607,unk__608]> ⬅️ ::Neg(%"slice_60__408")
2363 | # SequenceConstruct_127__408
%"77__408"<Sequence(Tensor(FLOAT16)),[unk__609,unk__610,unk__611,unk__612]> ⬅️ ::SequenceConstruct(%"neg_8__408", %"slice_59__408")
2364 | # n0__435
%"cat_8__408"<FLOAT16,[unk__609,unk__610,unk__611,unk__613]> ⬅️ ::ConcatFromSequence(%"77__408") {axis=-1}
2365 | # n0__436
%"mul_43__408"<FLOAT16,?> ⬅️ ::Mul(%"cat_8__408", %"unsqueeze_18__408")
2366 | # n0__437
%"alpha__437"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
2367 | # n1__437
%"alpha_0__437"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__437", %"mul_43__408")
2368 | # n2__437
%"other_1__437"<FLOAT16,?> ⬅️ ::Mul(%"mul_43__408", %"alpha_0__437")
2369 | # n3__437
%"add_25__408"<FLOAT16,?> ⬅️ ::Add(%"mul_42__408", %"other_1__437")
2370 | # n0__438
%"mul_44__408"<FLOAT16,?> ⬅️ ::Mul(%"transpose_17__408", %"unsqueeze_17__408")
2371 | # Constant_132__408
%"_val_82__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2372 | # Cast_133__408
%"_val_83__408"<INT64,?> ⬅️ ::Cast(%"_val_82__408") {to=7}
2373 | # Constant_134__408
%"_val_84__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2374 | # Reshape_135__408
%"_val_85__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_83__408", %"_val_84__408") {allowzero=0}
2375 | # Constant_136__408
%"_val_86__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2376 | # Cast_137__408
%"_val_87__408"<INT64,?> ⬅️ ::Cast(%"_val_86__408") {to=7}
2377 | # Constant_138__408
%"_val_88__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2378 | # Reshape_139__408
%"_val_89__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_87__408", %"_val_88__408") {allowzero=0}
2379 | # Constant_140__408
%"_val_90__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2380 | # Cast_141__408
%"_val_91__408"<INT64,?> ⬅️ ::Cast(%"_val_90__408") {to=7}
2381 | # Constant_142__408
%"_val_92__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2382 | # Reshape_143__408
%"_val_93__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_91__408", %"_val_92__408") {allowzero=0}
2383 | # Constant_144__408
%"_val_94__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2384 | # Cast_145__408
%"_val_95__408"<INT64,?> ⬅️ ::Cast(%"_val_94__408") {to=7}
2385 | # Constant_146__408
%"_val_96__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2386 | # Reshape_147__408
%"_val_97__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_95__408", %"_val_96__408") {allowzero=0}
2387 | # Slice_148__408
%"slice_61__408"<FLOAT16,[unk__614,unk__615,unk__616,unk__617]> ⬅️ ::Slice(%"transpose_17__408", %"_val_85__408", %"_val_89__408", %"_val_93__408", %"_val_97__408")
2388 | # Constant_149__408
%"_val_99__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2389 | # Cast_150__408
%"_val_100__408"<INT64,?> ⬅️ ::Cast(%"_val_99__408") {to=7}
2390 | # Constant_151__408
%"_val_101__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2391 | # Reshape_152__408
%"_val_102__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_100__408", %"_val_101__408") {allowzero=0}
2392 | # Constant_153__408
%"_val_103__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2393 | # Cast_154__408
%"_val_104__408"<INT64,?> ⬅️ ::Cast(%"_val_103__408") {to=7}
2394 | # Constant_155__408
%"_val_105__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2395 | # Reshape_156__408
%"_val_106__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_104__408", %"_val_105__408") {allowzero=0}
2396 | # Constant_157__408
%"_val_107__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2397 | # Cast_158__408
%"_val_108__408"<INT64,?> ⬅️ ::Cast(%"_val_107__408") {to=7}
2398 | # Constant_159__408
%"_val_109__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2399 | # Reshape_160__408
%"_val_110__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_108__408", %"_val_109__408") {allowzero=0}
2400 | # Constant_161__408
%"_val_111__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2401 | # Cast_162__408
%"_val_112__408"<INT64,?> ⬅️ ::Cast(%"_val_111__408") {to=7}
2402 | # Constant_163__408
%"_val_113__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2403 | # Reshape_164__408
%"_val_114__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_112__408", %"_val_113__408") {allowzero=0}
2404 | # Slice_165__408
%"slice_62__408"<FLOAT16,[unk__618,unk__619,unk__620,unk__621]> ⬅️ ::Slice(%"transpose_17__408", %"_val_102__408", %"_val_106__408", %"_val_110__408", %"_val_114__408")
2405 | # n0__439
%"neg_9__408"<FLOAT16,[unk__618,unk__619,unk__620,unk__621]> ⬅️ ::Neg(%"slice_62__408")
2406 | # SequenceConstruct_167__408
%"117__408"<Sequence(Tensor(FLOAT16)),[unk__622,unk__623,unk__624,unk__625]> ⬅️ ::SequenceConstruct(%"neg_9__408", %"slice_61__408")
2407 | # n0__440
%"cat_9__408"<FLOAT16,[unk__622,unk__623,unk__624,unk__626]> ⬅️ ::ConcatFromSequence(%"117__408") {axis=-1}
2408 | # n0__441
%"mul_45__408"<FLOAT16,?> ⬅️ ::Mul(%"cat_9__408", %"unsqueeze_18__408")
2409 | # n0__442
%"alpha__442"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
2410 | # n1__442
%"alpha_0__442"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__442", %"mul_45__408")
2411 | # n2__442
%"other_1__442"<FLOAT16,?> ⬅️ ::Mul(%"mul_45__408", %"alpha_0__442")
2412 | # n3__442
%"model_1_9"<FLOAT16,[1,8,128,128]> ⬅️ ::Add(%"mul_44__408", %"other_1__442")
2413 | # Constant_171__408
%"_val_121__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2414 | # Cast_172__408
%"_val_122__408"<INT64,?> ⬅️ ::Cast(%"_val_121__408") {to=7}
2415 | # Constant_173__408
%"_val_123__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2416 | # Reshape_174__408
%"_val_124__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_122__408", %"_val_123__408") {allowzero=0}
2417 | # Constant_175__408
%"_val_125__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2418 | # Cast_176__408
%"_val_126__408"<INT64,?> ⬅️ ::Cast(%"_val_125__408") {to=7}
2419 | # Constant_177__408
%"_val_127__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2420 | # Reshape_178__408
%"_val_128__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_126__408", %"_val_127__408") {allowzero=0}
2421 | # Constant_179__408
%"_val_129__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2422 | # Cast_180__408
%"_val_130__408"<INT64,?> ⬅️ ::Cast(%"_val_129__408") {to=7}
2423 | # Constant_181__408
%"_val_131__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2424 | # Reshape_182__408
%"_val_132__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_130__408", %"_val_131__408") {allowzero=0}
2425 | # Constant_183__408
%"_val_133__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2426 | # Cast_184__408
%"_val_134__408"<INT64,?> ⬅️ ::Cast(%"_val_133__408") {to=7}
2427 | # Constant_185__408
%"_val_135__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2428 | # Reshape_186__408
%"_val_136__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_134__408", %"_val_135__408") {allowzero=0}
2429 | # Slice_187__408
%"slice_63__408"<FLOAT16,[unk__627,unk__628,unk__629,unk__630]> ⬅️ ::Slice(%"model_1_9", %"_val_124__408", %"_val_128__408", %"_val_132__408", %"_val_136__408")
2430 | # Constant_188__408
%"_val_138__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2431 | # Cast_189__408
%"_val_139__408"<INT64,?> ⬅️ ::Cast(%"_val_138__408") {to=7}
2432 | # Constant_190__408
%"_val_140__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2433 | # Reshape_191__408
%"_val_141__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_139__408", %"_val_140__408") {allowzero=0}
2434 | # Constant_192__408
%"_val_142__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2435 | # Cast_193__408
%"_val_143__408"<INT64,?> ⬅️ ::Cast(%"_val_142__408") {to=7}
2436 | # Constant_194__408
%"_val_144__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2437 | # Reshape_195__408
%"_val_145__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_143__408", %"_val_144__408") {allowzero=0}
2438 | # Constant_196__408
%"_val_146__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2439 | # Cast_197__408
%"_val_147__408"<INT64,?> ⬅️ ::Cast(%"_val_146__408") {to=7}
2440 | # Constant_198__408
%"_val_148__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2441 | # Reshape_199__408
%"_val_149__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_147__408", %"_val_148__408") {allowzero=0}
2442 | # Constant_200__408
%"_val_150__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2443 | # Cast_201__408
%"_val_151__408"<INT64,?> ⬅️ ::Cast(%"_val_150__408") {to=7}
2444 | # Constant_202__408
%"_val_152__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2445 | # Reshape_203__408
%"_val_153__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_151__408", %"_val_152__408") {allowzero=0}
2446 | # Slice_204__408
%"slice_64__408"<FLOAT16,[unk__631,unk__632,unk__633,unk__634]> ⬅️ ::Slice(%"slice_63__408", %"_val_141__408", %"_val_145__408", %"_val_149__408", %"_val_153__408")
2447 | # n0__443
%"dim__443"<INT64,?> ⬅️ ::Constant() {value_int=2}
2448 | # n1__443
%"dim_0__443"<INT64,?> ⬅️ ::Cast(%"dim__443") {to=7}
2449 | # n2__443
%"unsqueeze_19__408"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_64__408", %"dim_0__443")
2450 | # Constant_206__408
%"_val_156__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2451 | # Cast_207__408
%"_val_157__408"<INT64,?> ⬅️ ::Cast(%"_val_156__408") {to=7}
2452 | # Constant_208__408
%"_val_158__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2453 | # Reshape_209__408
%"_val_159__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_157__408", %"_val_158__408") {allowzero=0}
2454 | # Constant_210__408
%"_val_160__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2455 | # Cast_211__408
%"_val_161__408"<INT64,?> ⬅️ ::Cast(%"_val_160__408") {to=7}
2456 | # Constant_212__408
%"_val_162__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2457 | # Reshape_213__408
%"_val_163__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_161__408", %"_val_162__408") {allowzero=0}
2458 | # Constant_214__408
%"_val_164__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2459 | # Cast_215__408
%"_val_165__408"<INT64,?> ⬅️ ::Cast(%"_val_164__408") {to=7}
2460 | # Constant_216__408
%"_val_166__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2461 | # Reshape_217__408
%"_val_167__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_165__408", %"_val_166__408") {allowzero=0}
2462 | # Constant_218__408
%"_val_168__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2463 | # Cast_219__408
%"_val_169__408"<INT64,?> ⬅️ ::Cast(%"_val_168__408") {to=7}
2464 | # Constant_220__408
%"_val_170__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2465 | # Reshape_221__408
%"_val_171__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_169__408", %"_val_170__408") {allowzero=0}
2466 | # Slice_222__408
%"slice_65__408"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_19__408", %"_val_159__408", %"_val_163__408", %"_val_167__408", %"_val_171__408")
2467 | # Constant_223__408
%"_val_173__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2468 | # Cast_224__408
%"_val_174__408"<INT64,?> ⬅️ ::Cast(%"_val_173__408") {to=7}
2469 | # Constant_225__408
%"_val_175__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2470 | # Reshape_226__408
%"_val_176__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_174__408", %"_val_175__408") {allowzero=0}
2471 | # Constant_227__408
%"_val_177__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2472 | # Cast_228__408
%"_val_178__408"<INT64,?> ⬅️ ::Cast(%"_val_177__408") {to=7}
2473 | # Constant_229__408
%"_val_179__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2474 | # Reshape_230__408
%"_val_180__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_178__408", %"_val_179__408") {allowzero=0}
2475 | # Constant_231__408
%"_val_181__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2476 | # Cast_232__408
%"_val_182__408"<INT64,?> ⬅️ ::Cast(%"_val_181__408") {to=7}
2477 | # Constant_233__408
%"_val_183__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2478 | # Reshape_234__408
%"_val_184__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_182__408", %"_val_183__408") {allowzero=0}
2479 | # Constant_235__408
%"_val_185__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2480 | # Cast_236__408
%"_val_186__408"<INT64,?> ⬅️ ::Cast(%"_val_185__408") {to=7}
2481 | # Constant_237__408
%"_val_187__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2482 | # Reshape_238__408
%"_val_188__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_186__408", %"_val_187__408") {allowzero=0}
2483 | # Slice_239__408
%"slice_66__408"<FLOAT16,?> ⬅️ ::Slice(%"slice_65__408", %"_val_176__408", %"_val_180__408", %"_val_184__408", %"_val_188__408")
2484 | # Constant_240__408
%"_val_190__408"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
2485 | # n0__444
%"size_0__444"<INT64,[5]> ⬅️ ::Cast(%"_val_190__408") {to=7}
2486 | # n1__444
%"size_1__444"<INT64,[5]> ⬅️ ::Abs(%"size_0__444")
2487 | # n2__444
%"expand_8__408"<FLOAT16,?> ⬅️ ::Expand(%"slice_66__408", %"size_1__444")
2488 | # n0__445
%"clone_8__408"<FLOAT16,?> ⬅️ ::Identity(%"expand_8__408")
2489 | # Constant_243__408
%"_val_193__408"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
2490 | # n0__446
%"size_0__446"<INT64,[4]> ⬅️ ::Cast(%"_val_193__408") {to=7}
2491 | # n1__446
%"view_90__408"<FLOAT16,[unk__635,unk__636,unk__637,unk__638]> ⬅️ ::Reshape(%"clone_8__408", %"size_0__446")
2492 | # Constant_245__408
%"_val_195__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2493 | # Cast_246__408
%"_val_196__408"<INT64,?> ⬅️ ::Cast(%"_val_195__408") {to=7}
2494 | # Constant_247__408
%"_val_197__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2495 | # Reshape_248__408
%"_val_198__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_196__408", %"_val_197__408") {allowzero=0}
2496 | # Constant_249__408
%"_val_199__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2497 | # Cast_250__408
%"_val_200__408"<INT64,?> ⬅️ ::Cast(%"_val_199__408") {to=7}
2498 | # Constant_251__408
%"_val_201__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2499 | # Reshape_252__408
%"_val_202__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_200__408", %"_val_201__408") {allowzero=0}
2500 | # Constant_253__408
%"_val_203__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2501 | # Cast_254__408
%"_val_204__408"<INT64,?> ⬅️ ::Cast(%"_val_203__408") {to=7}
2502 | # Constant_255__408
%"_val_205__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2503 | # Reshape_256__408
%"_val_206__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_204__408", %"_val_205__408") {allowzero=0}
2504 | # Constant_257__408
%"_val_207__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2505 | # Cast_258__408
%"_val_208__408"<INT64,?> ⬅️ ::Cast(%"_val_207__408") {to=7}
2506 | # Constant_259__408
%"_val_209__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2507 | # Reshape_260__408
%"_val_210__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_208__408", %"_val_209__408") {allowzero=0}
2508 | # Slice_261__408
%"slice_67__408"<FLOAT16,[unk__639,unk__640,unk__641,unk__642]> ⬅️ ::Slice(%"model_1_8", %"_val_198__408", %"_val_202__408", %"_val_206__408", %"_val_210__408")
2509 | # Constant_262__408
%"_val_212__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2510 | # Cast_263__408
%"_val_213__408"<INT64,?> ⬅️ ::Cast(%"_val_212__408") {to=7}
2511 | # Constant_264__408
%"_val_214__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2512 | # Reshape_265__408
%"_val_215__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_213__408", %"_val_214__408") {allowzero=0}
2513 | # Constant_266__408
%"_val_216__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2514 | # Cast_267__408
%"_val_217__408"<INT64,?> ⬅️ ::Cast(%"_val_216__408") {to=7}
2515 | # Constant_268__408
%"_val_218__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2516 | # Reshape_269__408
%"_val_219__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_217__408", %"_val_218__408") {allowzero=0}
2517 | # Constant_270__408
%"_val_220__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2518 | # Cast_271__408
%"_val_221__408"<INT64,?> ⬅️ ::Cast(%"_val_220__408") {to=7}
2519 | # Constant_272__408
%"_val_222__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2520 | # Reshape_273__408
%"_val_223__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_221__408", %"_val_222__408") {allowzero=0}
2521 | # Constant_274__408
%"_val_224__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2522 | # Cast_275__408
%"_val_225__408"<INT64,?> ⬅️ ::Cast(%"_val_224__408") {to=7}
2523 | # Constant_276__408
%"_val_226__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2524 | # Reshape_277__408
%"_val_227__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_225__408", %"_val_226__408") {allowzero=0}
2525 | # Slice_278__408
%"slice_68__408"<FLOAT16,[unk__643,unk__644,unk__645,unk__646]> ⬅️ ::Slice(%"slice_67__408", %"_val_215__408", %"_val_219__408", %"_val_223__408", %"_val_227__408")
2526 | # n0__447
%"dim__447"<INT64,?> ⬅️ ::Constant() {value_int=2}
2527 | # n1__447
%"dim_0__447"<INT64,?> ⬅️ ::Cast(%"dim__447") {to=7}
2528 | # n2__447
%"unsqueeze_20__408"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_68__408", %"dim_0__447")
2529 | # Constant_280__408
%"_val_230__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2530 | # Cast_281__408
%"_val_231__408"<INT64,?> ⬅️ ::Cast(%"_val_230__408") {to=7}
2531 | # Constant_282__408
%"_val_232__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2532 | # Reshape_283__408
%"_val_233__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_231__408", %"_val_232__408") {allowzero=0}
2533 | # Constant_284__408
%"_val_234__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2534 | # Cast_285__408
%"_val_235__408"<INT64,?> ⬅️ ::Cast(%"_val_234__408") {to=7}
2535 | # Constant_286__408
%"_val_236__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2536 | # Reshape_287__408
%"_val_237__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_235__408", %"_val_236__408") {allowzero=0}
2537 | # Constant_288__408
%"_val_238__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2538 | # Cast_289__408
%"_val_239__408"<INT64,?> ⬅️ ::Cast(%"_val_238__408") {to=7}
2539 | # Constant_290__408
%"_val_240__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2540 | # Reshape_291__408
%"_val_241__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_239__408", %"_val_240__408") {allowzero=0}
2541 | # Constant_292__408
%"_val_242__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2542 | # Cast_293__408
%"_val_243__408"<INT64,?> ⬅️ ::Cast(%"_val_242__408") {to=7}
2543 | # Constant_294__408
%"_val_244__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2544 | # Reshape_295__408
%"_val_245__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_243__408", %"_val_244__408") {allowzero=0}
2545 | # Slice_296__408
%"slice_69__408"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_20__408", %"_val_233__408", %"_val_237__408", %"_val_241__408", %"_val_245__408")
2546 | # Constant_297__408
%"_val_247__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2547 | # Cast_298__408
%"_val_248__408"<INT64,?> ⬅️ ::Cast(%"_val_247__408") {to=7}
2548 | # Constant_299__408
%"_val_249__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2549 | # Reshape_300__408
%"_val_250__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_248__408", %"_val_249__408") {allowzero=0}
2550 | # Constant_301__408
%"_val_251__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2551 | # Cast_302__408
%"_val_252__408"<INT64,?> ⬅️ ::Cast(%"_val_251__408") {to=7}
2552 | # Constant_303__408
%"_val_253__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2553 | # Reshape_304__408
%"_val_254__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_252__408", %"_val_253__408") {allowzero=0}
2554 | # Constant_305__408
%"_val_255__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2555 | # Cast_306__408
%"_val_256__408"<INT64,?> ⬅️ ::Cast(%"_val_255__408") {to=7}
2556 | # Constant_307__408
%"_val_257__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2557 | # Reshape_308__408
%"_val_258__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_256__408", %"_val_257__408") {allowzero=0}
2558 | # Constant_309__408
%"_val_259__408"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2559 | # Cast_310__408
%"_val_260__408"<INT64,?> ⬅️ ::Cast(%"_val_259__408") {to=7}
2560 | # Constant_311__408
%"_val_261__408"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2561 | # Reshape_312__408
%"_val_262__408"<INT64,[1]> ⬅️ ::Reshape(%"_val_260__408", %"_val_261__408") {allowzero=0}
2562 | # Slice_313__408
%"slice_70__408"<FLOAT16,?> ⬅️ ::Slice(%"slice_69__408", %"_val_250__408", %"_val_254__408", %"_val_258__408", %"_val_262__408")
2563 | # Constant_314__408
%"_val_264__408"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
2564 | # n0__448
%"size_0__448"<INT64,[5]> ⬅️ ::Cast(%"_val_264__408") {to=7}
2565 | # n1__448
%"size_1__448"<INT64,[5]> ⬅️ ::Abs(%"size_0__448")
2566 | # n2__448
%"expand_9__408"<FLOAT16,?> ⬅️ ::Expand(%"slice_70__408", %"size_1__448")
2567 | # n0__449
%"clone_9__408"<FLOAT16,?> ⬅️ ::Identity(%"expand_9__408")
2568 | # Constant_317__408
%"_val_267__408"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
2569 | # n0__450
%"size_0__450"<INT64,[4]> ⬅️ ::Cast(%"_val_267__408") {to=7}
2570 | # n1__450
%"view_91__408"<FLOAT16,[unk__647,unk__648,unk__649,unk__650]> ⬅️ ::Reshape(%"clone_9__408", %"size_0__450")
2571 | # n0__451
%"tmp__451"<INT64,[unk__651]> ⬅️ ::Shape(%"add_25__408")
2572 | # n1__451
%"int64_m1__451"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_m1')}
2573 | # n2__451
%"tmp_subscripted__451"<INT64,?> ⬅️ ::Gather(%"tmp__451", %"int64_m1__451") {axis=0}
2574 | # n3__451
%"embedding_size__451"<FLOAT16,?> ⬅️ ::CastLike(%"tmp_subscripted__451", %"add_25__408")
2575 | # n4__451
%"const__451"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
2576 | # n5__451
%"tmp_0__451"<FLOAT16,?> ⬅️ ::Sqrt(%"embedding_size__451")
2577 | # n6__451
%"const_cast__451"<FLOAT16,?> ⬅️ ::CastLike(%"const__451", %"tmp_0__451")
2578 | # n7__451
%"_val_269__408"<FLOAT16,?> ⬅️ ::Div(%"const_cast__451", %"tmp_0__451")
2579 | # CastLike_320__408
%"_val_270__408"<FLOAT16,?> ⬅️ ::CastLike(%"_val_269__408", %"add_25__408")
2580 | # n0__452
%"tmp__452"<INT64,[unk__652]> ⬅️ ::Shape(%"add_25__408")
2581 | # n1__452
%"int64_0_1d__452"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
2582 | # n2__452
%"int64_1_1d__452"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
2583 | # n3__452
%"int64_m2_1d__452"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
2584 | # n4__452
%"int64_m1_1d__452"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
2585 | # n5__452
%"target_length__452"<INT64,[unk__653]> ⬅️ ::Slice(%"tmp__452", %"int64_m2_1d__452", %"int64_m1_1d__452", %"int64_0_1d__452", %"int64_1_1d__452")
2586 | # n6__452
%"tmp_0__452"<INT64,[4]> ⬅️ ::Shape(%"view_90__408")
2587 | # n7__452
%"int64_0_1d_1__452"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
2588 | # n8__452
%"int64_1_1d_2__452"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
2589 | # n9__452
%"int64_m2_1d_3__452"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_3')}
2590 | # n10__452
%"int64_m1_1d_4__452"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_4')}
2591 | # n11__452
%"source_length__452"<INT64,[1]> ⬅️ ::Slice(%"tmp_0__452", %"int64_m2_1d_3__452", %"int64_m1_1d_4__452", %"int64_0_1d_1__452", %"int64_1_1d_2__452")
2592 | # n12__452
%"size__452"<INT64,[unk__654]> ⬅️ ::Concat(%"target_length__452", %"source_length__452") {axis=0}
2593 | # n13__452
%"const__452"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
2594 | # n14__452
%"attn_mask__452"<FLOAT,?> ⬅️ ::Expand(%"const__452", %"size__452")
2595 | # n15__452
%"attn_mask_5__452"<FLOAT,?> ⬅️ ::Trilu(%"attn_mask__452") {upper=0}
2596 | # n16__452
%"const_6__452"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_6')}
2597 | # n17__452
%"const_6_cast__452"<FLOAT,?> ⬅️ ::CastLike(%"const_6__452", %"attn_mask_5__452")
2598 | # n18__452
%"tmp_7__452"<BOOL,?> ⬅️ ::Equal(%"attn_mask_5__452", %"const_6_cast__452")
2599 | # n19__452
%"tmp_8__452"<FLOAT,?> ⬅️ ::Constant() {value_float=-inf}
2600 | # n20__452
%"const_9__452"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
2601 | # n21__452
%"const_9_cast__452"<FLOAT,?> ⬅️ ::CastLike(%"const_9__452", %"tmp_8__452")
2602 | # n22__452
%"attn_mask_10__452"<FLOAT,?> ⬅️ ::Where(%"tmp_7__452", %"tmp_8__452", %"const_9_cast__452")
2603 | # n23__452
%"_val_271__408"<FLOAT16,?> ⬅️ ::CastLike(%"attn_mask_10__452", %"add_25__408")
2604 | # n0__453
%"key_shape__453"<INT64,[4]> ⬅️ ::Shape(%"view_90__408")
2605 | # n1__453
%"int64_0_1d__453"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
2606 | # n2__453
%"int64_1_1d__453"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
2607 | # n3__453
%"int64_m1_1d__453"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
2608 | # n4__453
%"int64_9223372036854775807_1d__453"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_9223372036854775807_1d')}
2609 | # n5__453
%"key_last_dim__453"<INT64,[1]> ⬅️ ::Slice(%"key_shape__453", %"int64_m1_1d__453", %"int64_9223372036854775807_1d__453", %"int64_0_1d__453", %"int64_1_1d__453")
2610 | # n6__453
%"int64_0_1d_0__453"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_0')}
2611 | # n7__453
%"int64_1_1d_1__453"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_1')}
2612 | # n8__453
%"int64_m2_1d__453"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
2613 | # n9__453
%"int64_m1_1d_2__453"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_2')}
2614 | # n10__453
%"key_second_last_dim__453"<INT64,[1]> ⬅️ ::Slice(%"key_shape__453", %"int64_m2_1d__453", %"int64_m1_1d_2__453", %"int64_0_1d_0__453", %"int64_1_1d_1__453")
2615 | # n11__453
%"int64_0_1d_3__453"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
2616 | # n12__453
%"int64_1_1d_4__453"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
2617 | # n13__453
%"int64_m2_1d_5__453"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_5')}
2618 | # n14__453
%"key_first_dims__453"<INT64,[2]> ⬅️ ::Slice(%"key_shape__453", %"int64_0_1d_3__453", %"int64_m2_1d_5__453", %"int64_0_1d_3__453", %"int64_1_1d_4__453")
2619 | # n15__453
%"tmp__453"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2620 | # n16__453
%"key_squeezed_shape__453"<INT64,[3]> ⬅️ ::Concat(%"tmp__453", %"key_second_last_dim__453", %"key_last_dim__453") {axis=0}
2621 | # n17__453
%"key_squeezed__453"<FLOAT16,[unk__655,unk__656,unk__657]> ⬅️ ::Reshape(%"view_90__408", %"key_squeezed_shape__453")
2622 | # n18__453
%"key_squeezed_transposed__453"<FLOAT16,[unk__655,unk__657,unk__656]> ⬅️ ::Transpose(%"key_squeezed__453") {perm=[0, 2, 1]}
2623 | # n19__453
%"key_transposed_shape__453"<INT64,[4]> ⬅️ ::Concat(%"key_first_dims__453", %"key_last_dim__453", %"key_second_last_dim__453") {axis=0}
2624 | # n20__453
%"key_transposed__453"<FLOAT16,[unk__658,unk__659,unk__660,unk__661]> ⬅️ ::Reshape(%"key_squeezed_transposed__453", %"key_transposed_shape__453")
2625 | # n21__453
%"tmp_6__453"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__408")
2626 | # n22__453
%"query_scaled__453"<FLOAT16,?> ⬅️ ::Mul(%"add_25__408", %"tmp_6__453")
2627 | # n23__453
%"tmp_7__453"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__408")
2628 | # n24__453
%"key_transposed_scaled__453"<FLOAT16,[unk__658,unk__659,unk__660,unk__661]> ⬅️ ::Mul(%"key_transposed__453", %"tmp_7__453")
2629 | # n25__453
%"tmp_8__453"<FLOAT16,?> ⬅️ ::MatMul(%"query_scaled__453", %"key_transposed_scaled__453")
2630 | # n26__453
%"tmp_9__453"<FLOAT16,?> ⬅️ ::Add(%"tmp_8__453", %"_val_271__408")
2631 | # n27__453
%"attn_weight__453"<FLOAT16,?> ⬅️ ::Softmax(%"tmp_9__453") {axis=-1}
2632 | # n28__453
%"dropout_p__453"<FLOAT,?> ⬅️ ::Constant() {value_float=0.0}
2633 | # n29__453
%"attn_weight_10__453"<FLOAT16,?>, %"___453"<BOOL,?> ⬅️ ::Dropout(%"attn_weight__453", %"dropout_p__453")
2634 | # n30__453
%"_scaled_dot_product_efficient_attention_4__408"<FLOAT16,?> ⬅️ ::MatMul(%"attn_weight_10__453", %"view_91__408")
2635 | # n0__454
%"query_0__454"<FLOAT16,?> ⬅️ ::Transpose(%"add_25__408") {perm=[0, 2, 1, 3]}
2636 | # n1__454
%"query_shape__454"<INT64,[unk__662]> ⬅️ ::Shape(%"query_0__454")
2637 | # n2__454
%"int64_0_1d__454"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
2638 | # n3__454
%"int64_1_1d__454"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
2639 | # n4__454
%"query_first_dims__454"<INT64,[unk__663]> ⬅️ ::Slice(%"query_shape__454", %"int64_0_1d__454", %"int64_1_1d__454", %"int64_0_1d__454", %"int64_1_1d__454")
2640 | # n5__454
%"int64_0_1d_1__454"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
2641 | # n6__454
%"int64_1_1d_2__454"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
2642 | # n7__454
%"int64_2_1d__454"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_2_1d')}
2643 | # n8__454
%"query_second_dims__454"<INT64,[unk__664]> ⬅️ ::Slice(%"query_shape__454", %"int64_1_1d_2__454", %"int64_2_1d__454", %"int64_0_1d_1__454", %"int64_1_1d_2__454")
2644 | # n9__454
%"int64_0_1d_3__454"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
2645 | # n10__454
%"int64_1_1d_4__454"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
2646 | # n11__454
%"int64_m2_1d__454"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
2647 | # n12__454
%"int64_m1_1d__454"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
2648 | # n13__454
%"num_heads__454"<INT64,[unk__665]> ⬅️ ::Slice(%"query_shape__454", %"int64_m2_1d__454", %"int64_m1_1d__454", %"int64_0_1d_3__454", %"int64_1_1d_4__454")
2649 | # n14__454
%"compute_log_sumexp__454"<INT64,?> ⬅️ ::Constant() {value_int=0}
2650 | # n15__454
%"compute_log_sumexp_as_bool__454"<BOOL,?> ⬅️ ::Cast(%"compute_log_sumexp__454") {to=9}
2651 | # n16__454
%"_scaled_dot_product_efficient_attention_4_1__408"<FLOAT,?> ⬅️ ::If(%"compute_log_sumexp_as_bool__454") {then_branch=
graph(
name=thenGraph_14,
inputs=(
),
outputs=(
%"logsum_exp__454"<FLOAT,?>
),
) {
0 | # n0__454_184
%"tmp__454"<FLOAT,[unk__664]> ⬅️ ::Cast(%"query_second_dims__454") {to=1}
1 | # n1__454_185
%"const__454"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
2 | # n2__454_186
%"const_cast__454"<FLOAT,?> ⬅️ ::CastLike(%"const__454", %"tmp__454")
3 | # n3__454_187
%"tmp_5__454"<FLOAT,[unk__664]> ⬅️ ::Div(%"tmp__454", %"const_cast__454")
4 | # n4__454_188
%"tmp_6__454"<FLOAT,[unk__664]> ⬅️ ::Ceil(%"tmp_5__454")
5 | # n5__454_189
%"const_7__454"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_7')}
6 | # n6__454_190
%"const_7_cast__454"<FLOAT,?> ⬅️ ::CastLike(%"const_7__454", %"tmp_6__454")
7 | # n7__454_191
%"tmp_8__454"<FLOAT,[unk__664]> ⬅️ ::Mul(%"tmp_6__454", %"const_7_cast__454")
8 | # n8__454_192
%"logsumexp_dim__454"<INT64,[unk__664]> ⬅️ ::Cast(%"tmp_8__454") {to=7}
9 | # n9__454_193
%"const_9__454"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
10 | # n10__454_194
%"tmp_10__454"<INT64,[unk__666]> ⬅️ ::Concat(%"query_first_dims__454", %"num_heads__454", %"logsumexp_dim__454") {axis=0}
11 | # n11__454_195
%"logsum_exp__454"<FLOAT,?> ⬅️ ::Expand(%"const_9__454", %"tmp_10__454")
return %"logsum_exp__454"<FLOAT,?>
}, else_branch=
graph(
name=elseGraph_14,
inputs=(
),
outputs=(
%"logsum_exp_14__454"<FLOAT,?>
),
) {
0 | # n0__454_196
%"const_11__454"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_11')}
1 | # n1__454_197
%"int64_0_1d_12__454"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_12')}
2 | # n2__454_198
%"int64_0_1d_12_cast__454"<INT64,[1]> ⬅️ ::CastLike(%"int64_0_1d_12__454", %"num_heads__454")
3 | # n3__454_199
%"tmp_13__454"<INT64,[unk__667]> ⬅️ ::Concat(%"query_first_dims__454", %"num_heads__454", %"int64_0_1d_12_cast__454") {axis=0}
4 | # n4__454_200
%"logsum_exp_14__454"<FLOAT,?> ⬅️ ::Expand(%"const_11__454", %"tmp_13__454")
return %"logsum_exp_14__454"<FLOAT,?>
}}
2652 | # n17__454
%"tmp_16__454"<INT64,[0]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[0]>('Empty_INTS')}
2653 | # n18__454
%"tmp_17__454"<FLOAT,?> ⬅️ ::ConstantOfShape(%"tmp_16__454")
2654 | # n19__454
%"_scaled_dot_product_efficient_attention_4_3__408"<INT64,?> ⬅️ ::Cast(%"tmp_17__454") {to=7}
2655 | # Transpose_324__408
%"transpose_19__408"<FLOAT16,?> ⬅️ ::Transpose(%"_scaled_dot_product_efficient_attention_4__408") {perm=[0, 2, 1, 3]}
2656 | # Constant_325__408
%"_val_276__408"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2657 | # n0__455
%"size_0__455"<INT64,[3]> ⬅️ ::Cast(%"_val_276__408") {to=7}
2658 | # n1__455
%"view_92__408"<FLOAT16,[unk__668,unk__669,unk__670]> ⬅️ ::Reshape(%"transpose_19__408", %"size_0__455")
2659 | # n0__458
%"tmp__458"<INT64,[2]> ⬅️ ::Shape(%"model.layers.4.self_attn.o_proj.weight")
2660 | # n1__458
%"rank__457"<INT64,?> ⬅️ ::Size(%"tmp__458")
2661 | # n1__457
%"int64_2__457"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
2662 | # n2__457
%"int64_2_cast__457"<INT64,?> ⬅️ ::CastLike(%"int64_2__457", %"rank__457")
2663 | # n3__457
%"cond__457"<BOOL,?> ⬅️ ::Equal(%"rank__457", %"int64_2_cast__457")
2664 | # n4__457
%"t_31__456"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__457") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__457"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__457_201
%"result__457"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.4.self_attn.o_proj.weight") {perm=[1, 0]}
return %"result__457"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__457"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__457_202
%"result_0__457"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.4.self_attn.o_proj.weight")
return %"result_0__457"<FLOAT16,[4096,4096]>
}}
2665 | # Constant_3__456
%"_val_3__456"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
2666 | # n0__459
%"size_0__459"<INT64,[2]> ⬅️ ::Cast(%"_val_3__456") {to=7}
2667 | # n1__459
%"view_93__456"<FLOAT16,[unk__671,unk__672]> ⬅️ ::Reshape(%"view_92__408", %"size_0__459")
2668 | # n0__460
%"mm_31__456"<FLOAT16,[unk__671,4096]> ⬅️ ::MatMul(%"view_93__456", %"t_31__456")
2669 | # Constant_6__456
%"_val_6__456"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2670 | # n0__461
%"size_0__461"<INT64,[3]> ⬅️ ::Cast(%"_val_6__456") {to=7}
2671 | # n1__461
%"model_layers_4_self_attn_1_2__398"<FLOAT16,[unk__673,unk__674,unk__675]> ⬅️ ::Reshape(%"mm_31__456", %"size_0__461")
2672 | # n0__462
%"alpha__462"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
2673 | # n1__462
%"alpha_0__462"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__462", %"model_layers_4_self_attn_1_2__398")
2674 | # n2__462
%"other_1__462"<FLOAT16,[unk__673,unk__674,unk__675]> ⬅️ ::Mul(%"model_layers_4_self_attn_1_2__398", %"alpha_0__462")
2675 | # n3__462
%"add_27__398"<FLOAT16,[unk__676,128,4096]> ⬅️ ::Add(%"model_layers_3_1_2__1", %"other_1__462")
2676 | # Cast_3__463
%"_to_copy_26__463"<FLOAT,[unk__676,128,4096]> ⬅️ ::Cast(%"add_27__398") {to=1}
2677 | # Constant_4__463
%"_val_2__463"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2678 | # Cast_5__463
%"scalar_tensor_default_9__463"<FLOAT,?> ⬅️ ::Cast(%"_val_2__463") {to=1}
2679 | # n0__464
%"pow_10__463"<FLOAT,[unk__676,128,4096]> ⬅️ ::Pow(%"_to_copy_26__463", %"scalar_tensor_default_9__463")
2680 | # Constant_7__463
%"_val_5__463"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
2681 | # n0__466
%"tmp__466"<INT64,[3]> ⬅️ ::Shape(%"pow_10__463")
2682 | # n1__466
%"tmp_0__466"<INT64,?> ⬅️ ::Size(%"tmp__466")
2683 | # n2__466
%"tmp_1__466"<INT64,?> ⬅️ ::Constant() {value_int=0}
2684 | # n3__466
%"cond__465"<BOOL,?> ⬅️ ::Equal(%"tmp_0__466", %"tmp_1__466")
2685 | # n1__465
%"mean_9__463"<FLOAT,?> ⬅️ ::If(%"cond__465") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__465"<FLOAT,[unk__676,128,4096]>
),
) {
0 | # n0__465_203
%"result__465"<FLOAT,[unk__676,128,4096]> ⬅️ ::Identity(%"pow_10__463")
return %"result__465"<FLOAT,[unk__676,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__465"<FLOAT,?>
),
) {
0 | # n0__467
%"tmp__467"<INT64,[1]> ⬅️ ::Shape(%"_val_5__463")
1 | # n1__467
%"tmp_0__467"<INT64,?> ⬅️ ::Size(%"tmp__467")
2 | # n2__467
%"tmp_1__467"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__467
%"cond_0__465"<BOOL,?> ⬅️ ::Equal(%"tmp_0__467", %"tmp_1__467")
4 | # n1__465_205
%"dim_3__465"<INT64,?> ⬅️ ::If(%"cond_0__465") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__465"<INT64,[1,1]>
),
) {
0 | # n0__465_206
%"int64_0__465"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__465_207
%"dim_1__465"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__463", %"int64_0__465")
return %"dim_1__465"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__465"<INT64,[1]>
),
) {
0 | # n0__465_208
%"dim_2__465"<INT64,[1]> ⬅️ ::Identity(%"_val_5__463")
return %"dim_2__465"<INT64,[1]>
}}
5 | # n2__465
%"result_4__465"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_10__463", %"dim_3__465") {keepdims=1}
return %"result_4__465"<FLOAT,?>
}}
2686 | # Constant_9__463
%"_val_7__463"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
2687 | # n0__468
%"alpha__468"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
2688 | # n1__468
%"alpha_0__468"<FLOAT,?> ⬅️ ::CastLike(%"alpha__468", %"_val_7__463")
2689 | # n2__468
%"other_1__468"<FLOAT,?> ⬅️ ::Mul(%"_val_7__463", %"alpha_0__468")
2690 | # n3__468
%"add_28__463"<FLOAT,?> ⬅️ ::Add(%"mean_9__463", %"other_1__468")
2691 | # n0__469
%"tmp__469"<FLOAT,?> ⬅️ ::Sqrt(%"add_28__463")
2692 | # n1__469
%"rsqrt_9__463"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__469")
2693 | # n0__470
%"mul_46__463"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_26__463", %"rsqrt_9__463")
2694 | # Cast_13__463
%"_to_copy_27__463"<FLOAT16,?> ⬅️ ::Cast(%"mul_46__463") {to=10}
2695 | # n0__471
%"model_layers_4_post_attention_layernorm_1__398"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.4.post_attention_layernorm.weight", %"_to_copy_27__463")
2696 | # n0__475
%"tmp__475"<INT64,[2]> ⬅️ ::Shape(%"model.layers.4.mlp.gate_proj.weight")
2697 | # n1__475
%"rank__474"<INT64,?> ⬅️ ::Size(%"tmp__475")
2698 | # n1__474
%"int64_2__474"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
2699 | # n2__474
%"int64_2_cast__474"<INT64,?> ⬅️ ::CastLike(%"int64_2__474", %"rank__474")
2700 | # n3__474
%"cond__474"<BOOL,?> ⬅️ ::Equal(%"rank__474", %"int64_2_cast__474")
2701 | # n4__474
%"t_32__473"<FLOAT16,[unk__677,unk__678]> ⬅️ ::If(%"cond__474") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__474"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__474_209
%"result__474"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.4.mlp.gate_proj.weight") {perm=[1, 0]}
return %"result__474"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__474"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__474_210
%"result_0__474"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.4.mlp.gate_proj.weight")
return %"result_0__474"<FLOAT16,[14336,4096]>
}}
2702 | # Constant_3__473
%"_val_3__473"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
2703 | # n0__476
%"size_0__476"<INT64,[2]> ⬅️ ::Cast(%"_val_3__473") {to=7}
2704 | # n1__476
%"view_95__473"<FLOAT16,[unk__679,unk__680]> ⬅️ ::Reshape(%"model_layers_4_post_attention_layernorm_1__398", %"size_0__476")
2705 | # n0__477
%"mm_32__473"<FLOAT16,[unk__679,unk__678]> ⬅️ ::MatMul(%"view_95__473", %"t_32__473")
2706 | # Constant_6__473
%"_val_6__473"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2707 | # n0__478
%"size_0__478"<INT64,[3]> ⬅️ ::Cast(%"_val_6__473") {to=7}
2708 | # n1__478
%"model_layers_4_mlp_gate_proj_1__472"<FLOAT16,[unk__681,unk__682,unk__683]> ⬅️ ::Reshape(%"mm_32__473", %"size_0__478")
2709 | # Cast_0__479
%"_to_copy_28__479"<FLOAT,[unk__681,unk__682,unk__683]> ⬅️ ::Cast(%"model_layers_4_mlp_gate_proj_1__472") {to=1}
2710 | # n0__480
%"sigmoid_4__479"<FLOAT,[unk__681,unk__682,unk__683]> ⬅️ ::Sigmoid(%"_to_copy_28__479")
2711 | # n0__481
%"mul_48__479"<FLOAT,[unk__681,unk__682,unk__683]> ⬅️ ::Mul(%"_to_copy_28__479", %"sigmoid_4__479")
2712 | # Cast_3__479
%"model_layers_4_mlp_act_fn_1__472"<FLOAT16,[unk__681,unk__682,unk__683]> ⬅️ ::Cast(%"mul_48__479") {to=10}
2713 | # n0__484
%"tmp__484"<INT64,[2]> ⬅️ ::Shape(%"model.layers.4.mlp.up_proj.weight")
2714 | # n1__484
%"rank__483"<INT64,?> ⬅️ ::Size(%"tmp__484")
2715 | # n1__483
%"int64_2__483"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
2716 | # n2__483
%"int64_2_cast__483"<INT64,?> ⬅️ ::CastLike(%"int64_2__483", %"rank__483")
2717 | # n3__483
%"cond__483"<BOOL,?> ⬅️ ::Equal(%"rank__483", %"int64_2_cast__483")
2718 | # n4__483
%"t_33__482"<FLOAT16,[unk__684,unk__685]> ⬅️ ::If(%"cond__483") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__483"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__483_211
%"result__483"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.4.mlp.up_proj.weight") {perm=[1, 0]}
return %"result__483"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__483"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__483_212
%"result_0__483"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.4.mlp.up_proj.weight")
return %"result_0__483"<FLOAT16,[14336,4096]>
}}
2719 | # Constant_3__482
%"_val_3__482"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
2720 | # n0__485
%"size_0__485"<INT64,[2]> ⬅️ ::Cast(%"_val_3__482") {to=7}
2721 | # n1__485
%"view_97__482"<FLOAT16,[unk__686,unk__687]> ⬅️ ::Reshape(%"model_layers_4_post_attention_layernorm_1__398", %"size_0__485")
2722 | # n0__486
%"mm_33__482"<FLOAT16,[unk__686,unk__685]> ⬅️ ::MatMul(%"view_97__482", %"t_33__482")
2723 | # Constant_6__482
%"_val_6__482"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2724 | # n0__487
%"size_0__487"<INT64,[3]> ⬅️ ::Cast(%"_val_6__482") {to=7}
2725 | # n1__487
%"model_layers_4_mlp_up_proj_1__472"<FLOAT16,[unk__688,unk__689,unk__690]> ⬅️ ::Reshape(%"mm_33__482", %"size_0__487")
2726 | # n0__488
%"mul_49__472"<FLOAT16,[unk__691,unk__692,unk__693]> ⬅️ ::Mul(%"model_layers_4_mlp_act_fn_1__472", %"model_layers_4_mlp_up_proj_1__472")
2727 | # n0__491
%"tmp__491"<INT64,[2]> ⬅️ ::Shape(%"model.layers.4.mlp.down_proj.weight")
2728 | # n1__491
%"rank__490"<INT64,?> ⬅️ ::Size(%"tmp__491")
2729 | # n1__490
%"int64_2__490"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
2730 | # n2__490
%"int64_2_cast__490"<INT64,?> ⬅️ ::CastLike(%"int64_2__490", %"rank__490")
2731 | # n3__490
%"cond__490"<BOOL,?> ⬅️ ::Equal(%"rank__490", %"int64_2_cast__490")
2732 | # n4__490
%"t_34__489"<FLOAT16,[unk__694,unk__695]> ⬅️ ::If(%"cond__490") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__490"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__490_213
%"result__490"<FLOAT16,[14336,4096]> ⬅️ ::Transpose(%"model.layers.4.mlp.down_proj.weight") {perm=[1, 0]}
return %"result__490"<FLOAT16,[14336,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__490"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__490_214
%"result_0__490"<FLOAT16,[4096,14336]> ⬅️ ::Identity(%"model.layers.4.mlp.down_proj.weight")
return %"result_0__490"<FLOAT16,[4096,14336]>
}}
2733 | # Constant_3__489
%"_val_3__489"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
2734 | # n0__492
%"size_0__492"<INT64,[2]> ⬅️ ::Cast(%"_val_3__489") {to=7}
2735 | # n1__492
%"view_99__489"<FLOAT16,[unk__696,unk__697]> ⬅️ ::Reshape(%"mul_49__472", %"size_0__492")
2736 | # n0__493
%"mm_34__489"<FLOAT16,[unk__696,unk__695]> ⬅️ ::MatMul(%"view_99__489", %"t_34__489")
2737 | # Constant_6__489
%"_val_6__489"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2738 | # n0__494
%"size_0__494"<INT64,[3]> ⬅️ ::Cast(%"_val_6__489") {to=7}
2739 | # n1__494
%"model_layers_4_mlp_1__398"<FLOAT16,[unk__698,unk__699,unk__700]> ⬅️ ::Reshape(%"mm_34__489", %"size_0__494")
2740 | # n0__495
%"alpha__495"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
2741 | # n1__495
%"alpha_0__495"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__495", %"model_layers_4_mlp_1__398")
2742 | # n2__495
%"other_1__495"<FLOAT16,[unk__698,unk__699,unk__700]> ⬅️ ::Mul(%"model_layers_4_mlp_1__398", %"alpha_0__495")
2743 | # n3__495
%"model_layers_4_1_2__1"<FLOAT16,[unk__701,128,4096]> ⬅️ ::Add(%"add_27__398", %"other_1__495")
2744 | # Cast_3__497
%"_to_copy_30__497"<FLOAT,[unk__701,128,4096]> ⬅️ ::Cast(%"model_layers_4_1_2__1") {to=1}
2745 | # Constant_4__497
%"_val_2__497"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2746 | # Cast_5__497
%"scalar_tensor_default_10__497"<FLOAT,?> ⬅️ ::Cast(%"_val_2__497") {to=1}
2747 | # n0__498
%"pow_11__497"<FLOAT,[unk__701,128,4096]> ⬅️ ::Pow(%"_to_copy_30__497", %"scalar_tensor_default_10__497")
2748 | # Constant_7__497
%"_val_5__497"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
2749 | # n0__500
%"tmp__500"<INT64,[3]> ⬅️ ::Shape(%"pow_11__497")
2750 | # n1__500
%"tmp_0__500"<INT64,?> ⬅️ ::Size(%"tmp__500")
2751 | # n2__500
%"tmp_1__500"<INT64,?> ⬅️ ::Constant() {value_int=0}
2752 | # n3__500
%"cond__499"<BOOL,?> ⬅️ ::Equal(%"tmp_0__500", %"tmp_1__500")
2753 | # n1__499
%"mean_10__497"<FLOAT,?> ⬅️ ::If(%"cond__499") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__499"<FLOAT,[unk__701,128,4096]>
),
) {
0 | # n0__499_215
%"result__499"<FLOAT,[unk__701,128,4096]> ⬅️ ::Identity(%"pow_11__497")
return %"result__499"<FLOAT,[unk__701,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__499"<FLOAT,?>
),
) {
0 | # n0__501
%"tmp__501"<INT64,[1]> ⬅️ ::Shape(%"_val_5__497")
1 | # n1__501
%"tmp_0__501"<INT64,?> ⬅️ ::Size(%"tmp__501")
2 | # n2__501
%"tmp_1__501"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__501
%"cond_0__499"<BOOL,?> ⬅️ ::Equal(%"tmp_0__501", %"tmp_1__501")
4 | # n1__499_217
%"dim_3__499"<INT64,?> ⬅️ ::If(%"cond_0__499") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__499"<INT64,[1,1]>
),
) {
0 | # n0__499_218
%"int64_0__499"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__499_219
%"dim_1__499"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__497", %"int64_0__499")
return %"dim_1__499"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__499"<INT64,[1]>
),
) {
0 | # n0__499_220
%"dim_2__499"<INT64,[1]> ⬅️ ::Identity(%"_val_5__497")
return %"dim_2__499"<INT64,[1]>
}}
5 | # n2__499
%"result_4__499"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_11__497", %"dim_3__499") {keepdims=1}
return %"result_4__499"<FLOAT,?>
}}
2754 | # Constant_9__497
%"_val_7__497"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
2755 | # n0__502
%"alpha__502"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
2756 | # n1__502
%"alpha_0__502"<FLOAT,?> ⬅️ ::CastLike(%"alpha__502", %"_val_7__497")
2757 | # n2__502
%"other_1__502"<FLOAT,?> ⬅️ ::Mul(%"_val_7__497", %"alpha_0__502")
2758 | # n3__502
%"add_30__497"<FLOAT,?> ⬅️ ::Add(%"mean_10__497", %"other_1__502")
2759 | # n0__503
%"tmp__503"<FLOAT,?> ⬅️ ::Sqrt(%"add_30__497")
2760 | # n1__503
%"rsqrt_10__497"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__503")
2761 | # n0__504
%"mul_50__497"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_30__497", %"rsqrt_10__497")
2762 | # Cast_13__497
%"_to_copy_31__497"<FLOAT16,?> ⬅️ ::Cast(%"mul_50__497") {to=10}
2763 | # n0__505
%"model_layers_5_input_layernorm_1__496"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.5.input_layernorm.weight", %"_to_copy_31__497")
2764 | # n0__509
%"tmp__509"<INT64,[2]> ⬅️ ::Shape(%"model.layers.5.self_attn.q_proj.weight")
2765 | # n1__509
%"rank__508"<INT64,?> ⬅️ ::Size(%"tmp__509")
2766 | # n1__508
%"int64_2__508"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
2767 | # n2__508
%"int64_2_cast__508"<INT64,?> ⬅️ ::CastLike(%"int64_2__508", %"rank__508")
2768 | # n3__508
%"cond__508"<BOOL,?> ⬅️ ::Equal(%"rank__508", %"int64_2_cast__508")
2769 | # n4__508
%"t_35__507"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__508") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__508"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__508_221
%"result__508"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.5.self_attn.q_proj.weight") {perm=[1, 0]}
return %"result__508"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__508"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__508_222
%"result_0__508"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.5.self_attn.q_proj.weight")
return %"result_0__508"<FLOAT16,[4096,4096]>
}}
2770 | # Constant_3__507
%"_val_3__507"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
2771 | # n0__510
%"size_0__510"<INT64,[2]> ⬅️ ::Cast(%"_val_3__507") {to=7}
2772 | # n1__510
%"view_101__507"<FLOAT16,[unk__702,unk__703]> ⬅️ ::Reshape(%"model_layers_5_input_layernorm_1__496", %"size_0__510")
2773 | # n0__511
%"mm_35__507"<FLOAT16,[unk__702,4096]> ⬅️ ::MatMul(%"view_101__507", %"t_35__507")
2774 | # Constant_6__507
%"_val_6__507"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2775 | # n0__512
%"size_0__512"<INT64,[3]> ⬅️ ::Cast(%"_val_6__507") {to=7}
2776 | # n1__512
%"model_layers_5_self_attn_q_proj_1__506"<FLOAT16,[unk__704,unk__705,unk__706]> ⬅️ ::Reshape(%"mm_35__507", %"size_0__512")
2777 | # n0__515
%"tmp__515"<INT64,[2]> ⬅️ ::Shape(%"model.layers.5.self_attn.k_proj.weight")
2778 | # n1__515
%"rank__514"<INT64,?> ⬅️ ::Size(%"tmp__515")
2779 | # n1__514
%"int64_2__514"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
2780 | # n2__514
%"int64_2_cast__514"<INT64,?> ⬅️ ::CastLike(%"int64_2__514", %"rank__514")
2781 | # n3__514
%"cond__514"<BOOL,?> ⬅️ ::Equal(%"rank__514", %"int64_2_cast__514")
2782 | # n4__514
%"t_36__513"<FLOAT16,[unk__707,unk__708]> ⬅️ ::If(%"cond__514") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__514"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__514_223
%"result__514"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.5.self_attn.k_proj.weight") {perm=[1, 0]}
return %"result__514"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__514"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__514_224
%"result_0__514"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.5.self_attn.k_proj.weight")
return %"result_0__514"<FLOAT16,[1024,4096]>
}}
2783 | # Constant_3__513
%"_val_3__513"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
2784 | # n0__516
%"size_0__516"<INT64,[2]> ⬅️ ::Cast(%"_val_3__513") {to=7}
2785 | # n1__516
%"view_103__513"<FLOAT16,[unk__709,unk__710]> ⬅️ ::Reshape(%"model_layers_5_input_layernorm_1__496", %"size_0__516")
2786 | # n0__517
%"mm_36__513"<FLOAT16,[unk__709,unk__708]> ⬅️ ::MatMul(%"view_103__513", %"t_36__513")
2787 | # Constant_6__513
%"_val_6__513"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2788 | # n0__518
%"size_0__518"<INT64,[3]> ⬅️ ::Cast(%"_val_6__513") {to=7}
2789 | # n1__518
%"model_layers_5_self_attn_k_proj_1__506"<FLOAT16,[unk__711,unk__712,unk__713]> ⬅️ ::Reshape(%"mm_36__513", %"size_0__518")
2790 | # n0__521
%"tmp__521"<INT64,[2]> ⬅️ ::Shape(%"model.layers.5.self_attn.v_proj.weight")
2791 | # n1__521
%"rank__520"<INT64,?> ⬅️ ::Size(%"tmp__521")
2792 | # n1__520
%"int64_2__520"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
2793 | # n2__520
%"int64_2_cast__520"<INT64,?> ⬅️ ::CastLike(%"int64_2__520", %"rank__520")
2794 | # n3__520
%"cond__520"<BOOL,?> ⬅️ ::Equal(%"rank__520", %"int64_2_cast__520")
2795 | # n4__520
%"t_37__519"<FLOAT16,[unk__714,unk__715]> ⬅️ ::If(%"cond__520") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__520"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__520_225
%"result__520"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.5.self_attn.v_proj.weight") {perm=[1, 0]}
return %"result__520"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__520"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__520_226
%"result_0__520"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.5.self_attn.v_proj.weight")
return %"result_0__520"<FLOAT16,[1024,4096]>
}}
2796 | # Constant_3__519
%"_val_3__519"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
2797 | # n0__522
%"size_0__522"<INT64,[2]> ⬅️ ::Cast(%"_val_3__519") {to=7}
2798 | # n1__522
%"view_105__519"<FLOAT16,[unk__716,unk__717]> ⬅️ ::Reshape(%"model_layers_5_input_layernorm_1__496", %"size_0__522")
2799 | # n0__523
%"mm_37__519"<FLOAT16,[unk__716,unk__715]> ⬅️ ::MatMul(%"view_105__519", %"t_37__519")
2800 | # Constant_6__519
%"_val_6__519"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
2801 | # n0__524
%"size_0__524"<INT64,[3]> ⬅️ ::Cast(%"_val_6__519") {to=7}
2802 | # n1__524
%"model_layers_5_self_attn_v_proj_1__506"<FLOAT16,[unk__718,unk__719,unk__720]> ⬅️ ::Reshape(%"mm_37__519", %"size_0__524")
2803 | # Constant_61__506
%"_val_8__506"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
2804 | # n0__525
%"size_0__525"<INT64,[4]> ⬅️ ::Cast(%"_val_8__506") {to=7}
2805 | # n1__525
%"view_107__506"<FLOAT16,[unk__721,unk__722,unk__723,unk__724]> ⬅️ ::Reshape(%"model_layers_5_self_attn_q_proj_1__506", %"size_0__525")
2806 | # Transpose_63__506
%"transpose_20__506"<FLOAT16,[unk__721,unk__723,unk__722,unk__724]> ⬅️ ::Transpose(%"view_107__506") {perm=[0, 2, 1, 3]}
2807 | # Constant_64__506
%"_val_11__506"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
2808 | # n0__526
%"size_0__526"<INT64,[4]> ⬅️ ::Cast(%"_val_11__506") {to=7}
2809 | # n1__526
%"view_108__506"<FLOAT16,[unk__725,unk__726,unk__727,unk__728]> ⬅️ ::Reshape(%"model_layers_5_self_attn_k_proj_1__506", %"size_0__526")
2810 | # Transpose_66__506
%"transpose_21__506"<FLOAT16,[unk__725,unk__727,unk__726,unk__728]> ⬅️ ::Transpose(%"view_108__506") {perm=[0, 2, 1, 3]}
2811 | # Constant_67__506
%"_val_14__506"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
2812 | # n0__527
%"size_0__527"<INT64,[4]> ⬅️ ::Cast(%"_val_14__506") {to=7}
2813 | # n1__527
%"view_109__506"<FLOAT16,[unk__729,unk__730,unk__731,unk__732]> ⬅️ ::Reshape(%"model_layers_5_self_attn_v_proj_1__506", %"size_0__527")
2814 | # Transpose_69__506
%"model_1_10"<FLOAT16,[1,8,128,128]> ⬅️ ::Transpose(%"view_109__506") {perm=[0, 2, 1, 3]}
2815 | # Constant_8__528
%"_val_1__528"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2816 | # Cast_9__528
%"_val_2__528"<INT64,?> ⬅️ ::Cast(%"_val_1__528") {to=7}
2817 | # Constant_10__528
%"_val_3__528"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2818 | # Reshape_11__528
%"_val_4__528"<INT64,[1]> ⬅️ ::Reshape(%"_val_2__528", %"_val_3__528") {allowzero=0}
2819 | # Constant_12__528
%"_val_5__528"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2820 | # Cast_13__528
%"_val_6__528"<INT64,?> ⬅️ ::Cast(%"_val_5__528") {to=7}
2821 | # Constant_14__528
%"_val_7__528"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2822 | # Reshape_15__528
%"_val_8__528"<INT64,[1]> ⬅️ ::Reshape(%"_val_6__528", %"_val_7__528") {allowzero=0}
2823 | # Constant_16__528
%"_val_9__528"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2824 | # Cast_17__528
%"_val_10__528"<INT64,?> ⬅️ ::Cast(%"_val_9__528") {to=7}
2825 | # Constant_18__528
%"_val_11__528"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2826 | # Reshape_19__528
%"_val_12__528"<INT64,[1]> ⬅️ ::Reshape(%"_val_10__528", %"_val_11__528") {allowzero=0}
2827 | # Constant_20__528
%"_val_13__528"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2828 | # Cast_21__528
%"_val_14__528"<INT64,?> ⬅️ ::Cast(%"_val_13__528") {to=7}
2829 | # Constant_22__528
%"_val_15__528"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2830 | # Reshape_23__528
%"_val_16__528"<INT64,[1]> ⬅️ ::Reshape(%"_val_14__528", %"_val_15__528") {allowzero=0}
2831 | # Slice_24__528
%"model_layers_5_self_attn_rotary_emb_1__506"<FLOAT16,[unk__733,unk__734]> ⬅️ ::Slice(%"model.layers.5.self_attn.rotary_emb.cos_cached", %"_val_4__528", %"_val_8__528", %"_val_12__528", %"_val_16__528")
2832 | # Constant_25__528
%"_val_19__528"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2833 | # Cast_26__528
%"_val_20__528"<INT64,?> ⬅️ ::Cast(%"_val_19__528") {to=7}
2834 | # Constant_27__528
%"_val_21__528"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2835 | # Reshape_28__528
%"_val_22__528"<INT64,[1]> ⬅️ ::Reshape(%"_val_20__528", %"_val_21__528") {allowzero=0}
2836 | # Constant_29__528
%"_val_23__528"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2837 | # Cast_30__528
%"_val_24__528"<INT64,?> ⬅️ ::Cast(%"_val_23__528") {to=7}
2838 | # Constant_31__528
%"_val_25__528"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2839 | # Reshape_32__528
%"_val_26__528"<INT64,[1]> ⬅️ ::Reshape(%"_val_24__528", %"_val_25__528") {allowzero=0}
2840 | # Constant_33__528
%"_val_27__528"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2841 | # Cast_34__528
%"_val_28__528"<INT64,?> ⬅️ ::Cast(%"_val_27__528") {to=7}
2842 | # Constant_35__528
%"_val_29__528"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2843 | # Reshape_36__528
%"_val_30__528"<INT64,[1]> ⬅️ ::Reshape(%"_val_28__528", %"_val_29__528") {allowzero=0}
2844 | # Constant_37__528
%"_val_31__528"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2845 | # Cast_38__528
%"_val_32__528"<INT64,?> ⬅️ ::Cast(%"_val_31__528") {to=7}
2846 | # Constant_39__528
%"_val_33__528"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2847 | # Reshape_40__528
%"_val_34__528"<INT64,[1]> ⬅️ ::Reshape(%"_val_32__528", %"_val_33__528") {allowzero=0}
2848 | # Slice_41__528
%"model_layers_5_self_attn_rotary_emb_1_1__506"<FLOAT16,[unk__735,unk__736]> ⬅️ ::Slice(%"model.layers.5.self_attn.rotary_emb.sin_cached", %"_val_22__528", %"_val_26__528", %"_val_30__528", %"_val_34__528")
2849 | # Transpose_71__506
%"_val_21__506"<FLOAT16,[unk__733,unk__734]> ⬅️ ::Transpose(%"model_layers_5_self_attn_rotary_emb_1__506") {perm=[0, 1]}
2850 | # Max_72__506
%"_val_22__506"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
2851 | # Shape_73__506
%"_val_23__506"<INT64,[2]> ⬅️ ::Shape(%"_val_22__506") {start=0}
2852 | # Expand_74__506
%"_val_24__506"<INT64,[unk__737,unk__738]> ⬅️ ::Expand(%"view__1", %"_val_23__506")
2853 | # Constant_75__506
%"_val_25__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2854 | # Unsqueeze_76__506
%"_val_26__506"<INT64,[unk__737,unk__738,1]> ⬅️ ::Unsqueeze(%"_val_24__506", %"_val_25__506")
2855 | # Concat_77__506
%"_val_27__506"<INT64,[unk__737,unk__738,1]> ⬅️ ::Concat(%"_val_26__506") {axis=-1}
2856 | # GatherND_78__506
%"_val_28__506"<FLOAT16,[unk__737,unk__738,unk__734]> ⬅️ ::GatherND(%"_val_21__506", %"_val_27__506") {batch_dims=0}
2857 | # Transpose_79__506
%"index_10__506"<FLOAT16,[unk__737,unk__738,unk__734]> ⬅️ ::Transpose(%"_val_28__506") {perm=[0, 1, 2]}
2858 | # n0__529
%"dim__529"<INT64,?> ⬅️ ::Constant() {value_int=1}
2859 | # n1__529
%"dim_0__529"<INT64,?> ⬅️ ::Cast(%"dim__529") {to=7}
2860 | # n2__529
%"unsqueeze_21__506"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_10__506", %"dim_0__529")
2861 | # Transpose_81__506
%"_val_31__506"<FLOAT16,[unk__735,unk__736]> ⬅️ ::Transpose(%"model_layers_5_self_attn_rotary_emb_1_1__506") {perm=[0, 1]}
2862 | # Max_82__506
%"_val_32__506"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
2863 | # Shape_83__506
%"_val_33__506"<INT64,[2]> ⬅️ ::Shape(%"_val_32__506") {start=0}
2864 | # Expand_84__506
%"_val_34__506"<INT64,[unk__739,unk__740]> ⬅️ ::Expand(%"view__1", %"_val_33__506")
2865 | # Constant_85__506
%"_val_35__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2866 | # Unsqueeze_86__506
%"_val_36__506"<INT64,[unk__739,unk__740,1]> ⬅️ ::Unsqueeze(%"_val_34__506", %"_val_35__506")
2867 | # Concat_87__506
%"_val_37__506"<INT64,[unk__739,unk__740,1]> ⬅️ ::Concat(%"_val_36__506") {axis=-1}
2868 | # GatherND_88__506
%"_val_38__506"<FLOAT16,[unk__739,unk__740,unk__736]> ⬅️ ::GatherND(%"_val_31__506", %"_val_37__506") {batch_dims=0}
2869 | # Transpose_89__506
%"index_11__506"<FLOAT16,[unk__739,unk__740,unk__736]> ⬅️ ::Transpose(%"_val_38__506") {perm=[0, 1, 2]}
2870 | # n0__530
%"dim__530"<INT64,?> ⬅️ ::Constant() {value_int=1}
2871 | # n1__530
%"dim_0__530"<INT64,?> ⬅️ ::Cast(%"dim__530") {to=7}
2872 | # n2__530
%"unsqueeze_22__506"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_11__506", %"dim_0__530")
2873 | # n0__531
%"mul_52__506"<FLOAT16,?> ⬅️ ::Mul(%"transpose_20__506", %"unsqueeze_21__506")
2874 | # Constant_92__506
%"_val_42__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2875 | # Cast_93__506
%"_val_43__506"<INT64,?> ⬅️ ::Cast(%"_val_42__506") {to=7}
2876 | # Constant_94__506
%"_val_44__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2877 | # Reshape_95__506
%"_val_45__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_43__506", %"_val_44__506") {allowzero=0}
2878 | # Constant_96__506
%"_val_46__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2879 | # Cast_97__506
%"_val_47__506"<INT64,?> ⬅️ ::Cast(%"_val_46__506") {to=7}
2880 | # Constant_98__506
%"_val_48__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2881 | # Reshape_99__506
%"_val_49__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_47__506", %"_val_48__506") {allowzero=0}
2882 | # Constant_100__506
%"_val_50__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2883 | # Cast_101__506
%"_val_51__506"<INT64,?> ⬅️ ::Cast(%"_val_50__506") {to=7}
2884 | # Constant_102__506
%"_val_52__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2885 | # Reshape_103__506
%"_val_53__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_51__506", %"_val_52__506") {allowzero=0}
2886 | # Constant_104__506
%"_val_54__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2887 | # Cast_105__506
%"_val_55__506"<INT64,?> ⬅️ ::Cast(%"_val_54__506") {to=7}
2888 | # Constant_106__506
%"_val_56__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2889 | # Reshape_107__506
%"_val_57__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_55__506", %"_val_56__506") {allowzero=0}
2890 | # Slice_108__506
%"slice_73__506"<FLOAT16,[unk__741,unk__742,unk__743,unk__744]> ⬅️ ::Slice(%"transpose_20__506", %"_val_45__506", %"_val_49__506", %"_val_53__506", %"_val_57__506")
2891 | # Constant_109__506
%"_val_59__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2892 | # Cast_110__506
%"_val_60__506"<INT64,?> ⬅️ ::Cast(%"_val_59__506") {to=7}
2893 | # Constant_111__506
%"_val_61__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2894 | # Reshape_112__506
%"_val_62__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_60__506", %"_val_61__506") {allowzero=0}
2895 | # Constant_113__506
%"_val_63__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2896 | # Cast_114__506
%"_val_64__506"<INT64,?> ⬅️ ::Cast(%"_val_63__506") {to=7}
2897 | # Constant_115__506
%"_val_65__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2898 | # Reshape_116__506
%"_val_66__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_64__506", %"_val_65__506") {allowzero=0}
2899 | # Constant_117__506
%"_val_67__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2900 | # Cast_118__506
%"_val_68__506"<INT64,?> ⬅️ ::Cast(%"_val_67__506") {to=7}
2901 | # Constant_119__506
%"_val_69__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2902 | # Reshape_120__506
%"_val_70__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_68__506", %"_val_69__506") {allowzero=0}
2903 | # Constant_121__506
%"_val_71__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2904 | # Cast_122__506
%"_val_72__506"<INT64,?> ⬅️ ::Cast(%"_val_71__506") {to=7}
2905 | # Constant_123__506
%"_val_73__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2906 | # Reshape_124__506
%"_val_74__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_72__506", %"_val_73__506") {allowzero=0}
2907 | # Slice_125__506
%"slice_74__506"<FLOAT16,[unk__745,unk__746,unk__747,unk__748]> ⬅️ ::Slice(%"transpose_20__506", %"_val_62__506", %"_val_66__506", %"_val_70__506", %"_val_74__506")
2908 | # n0__532
%"neg_10__506"<FLOAT16,[unk__745,unk__746,unk__747,unk__748]> ⬅️ ::Neg(%"slice_74__506")
2909 | # SequenceConstruct_127__506
%"77__506"<Sequence(Tensor(FLOAT16)),[unk__749,unk__750,unk__751,unk__752]> ⬅️ ::SequenceConstruct(%"neg_10__506", %"slice_73__506")
2910 | # n0__533
%"cat_10__506"<FLOAT16,[unk__749,unk__750,unk__751,unk__753]> ⬅️ ::ConcatFromSequence(%"77__506") {axis=-1}
2911 | # n0__534
%"mul_53__506"<FLOAT16,?> ⬅️ ::Mul(%"cat_10__506", %"unsqueeze_22__506")
2912 | # n0__535
%"alpha__535"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
2913 | # n1__535
%"alpha_0__535"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__535", %"mul_53__506")
2914 | # n2__535
%"other_1__535"<FLOAT16,?> ⬅️ ::Mul(%"mul_53__506", %"alpha_0__535")
2915 | # n3__535
%"add_31__506"<FLOAT16,?> ⬅️ ::Add(%"mul_52__506", %"other_1__535")
2916 | # n0__536
%"mul_54__506"<FLOAT16,?> ⬅️ ::Mul(%"transpose_21__506", %"unsqueeze_21__506")
2917 | # Constant_132__506
%"_val_82__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2918 | # Cast_133__506
%"_val_83__506"<INT64,?> ⬅️ ::Cast(%"_val_82__506") {to=7}
2919 | # Constant_134__506
%"_val_84__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2920 | # Reshape_135__506
%"_val_85__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_83__506", %"_val_84__506") {allowzero=0}
2921 | # Constant_136__506
%"_val_86__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2922 | # Cast_137__506
%"_val_87__506"<INT64,?> ⬅️ ::Cast(%"_val_86__506") {to=7}
2923 | # Constant_138__506
%"_val_88__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2924 | # Reshape_139__506
%"_val_89__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_87__506", %"_val_88__506") {allowzero=0}
2925 | # Constant_140__506
%"_val_90__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2926 | # Cast_141__506
%"_val_91__506"<INT64,?> ⬅️ ::Cast(%"_val_90__506") {to=7}
2927 | # Constant_142__506
%"_val_92__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2928 | # Reshape_143__506
%"_val_93__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_91__506", %"_val_92__506") {allowzero=0}
2929 | # Constant_144__506
%"_val_94__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2930 | # Cast_145__506
%"_val_95__506"<INT64,?> ⬅️ ::Cast(%"_val_94__506") {to=7}
2931 | # Constant_146__506
%"_val_96__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2932 | # Reshape_147__506
%"_val_97__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_95__506", %"_val_96__506") {allowzero=0}
2933 | # Slice_148__506
%"slice_75__506"<FLOAT16,[unk__754,unk__755,unk__756,unk__757]> ⬅️ ::Slice(%"transpose_21__506", %"_val_85__506", %"_val_89__506", %"_val_93__506", %"_val_97__506")
2934 | # Constant_149__506
%"_val_99__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2935 | # Cast_150__506
%"_val_100__506"<INT64,?> ⬅️ ::Cast(%"_val_99__506") {to=7}
2936 | # Constant_151__506
%"_val_101__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2937 | # Reshape_152__506
%"_val_102__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_100__506", %"_val_101__506") {allowzero=0}
2938 | # Constant_153__506
%"_val_103__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2939 | # Cast_154__506
%"_val_104__506"<INT64,?> ⬅️ ::Cast(%"_val_103__506") {to=7}
2940 | # Constant_155__506
%"_val_105__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2941 | # Reshape_156__506
%"_val_106__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_104__506", %"_val_105__506") {allowzero=0}
2942 | # Constant_157__506
%"_val_107__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2943 | # Cast_158__506
%"_val_108__506"<INT64,?> ⬅️ ::Cast(%"_val_107__506") {to=7}
2944 | # Constant_159__506
%"_val_109__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2945 | # Reshape_160__506
%"_val_110__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_108__506", %"_val_109__506") {allowzero=0}
2946 | # Constant_161__506
%"_val_111__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2947 | # Cast_162__506
%"_val_112__506"<INT64,?> ⬅️ ::Cast(%"_val_111__506") {to=7}
2948 | # Constant_163__506
%"_val_113__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2949 | # Reshape_164__506
%"_val_114__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_112__506", %"_val_113__506") {allowzero=0}
2950 | # Slice_165__506
%"slice_76__506"<FLOAT16,[unk__758,unk__759,unk__760,unk__761]> ⬅️ ::Slice(%"transpose_21__506", %"_val_102__506", %"_val_106__506", %"_val_110__506", %"_val_114__506")
2951 | # n0__537
%"neg_11__506"<FLOAT16,[unk__758,unk__759,unk__760,unk__761]> ⬅️ ::Neg(%"slice_76__506")
2952 | # SequenceConstruct_167__506
%"117__506"<Sequence(Tensor(FLOAT16)),[unk__762,unk__763,unk__764,unk__765]> ⬅️ ::SequenceConstruct(%"neg_11__506", %"slice_75__506")
2953 | # n0__538
%"cat_11__506"<FLOAT16,[unk__762,unk__763,unk__764,unk__766]> ⬅️ ::ConcatFromSequence(%"117__506") {axis=-1}
2954 | # n0__539
%"mul_55__506"<FLOAT16,?> ⬅️ ::Mul(%"cat_11__506", %"unsqueeze_22__506")
2955 | # n0__540
%"alpha__540"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
2956 | # n1__540
%"alpha_0__540"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__540", %"mul_55__506")
2957 | # n2__540
%"other_1__540"<FLOAT16,?> ⬅️ ::Mul(%"mul_55__506", %"alpha_0__540")
2958 | # n3__540
%"model_1_11"<FLOAT16,[1,8,128,128]> ⬅️ ::Add(%"mul_54__506", %"other_1__540")
2959 | # Constant_171__506
%"_val_121__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2960 | # Cast_172__506
%"_val_122__506"<INT64,?> ⬅️ ::Cast(%"_val_121__506") {to=7}
2961 | # Constant_173__506
%"_val_123__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2962 | # Reshape_174__506
%"_val_124__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_122__506", %"_val_123__506") {allowzero=0}
2963 | # Constant_175__506
%"_val_125__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2964 | # Cast_176__506
%"_val_126__506"<INT64,?> ⬅️ ::Cast(%"_val_125__506") {to=7}
2965 | # Constant_177__506
%"_val_127__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2966 | # Reshape_178__506
%"_val_128__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_126__506", %"_val_127__506") {allowzero=0}
2967 | # Constant_179__506
%"_val_129__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2968 | # Cast_180__506
%"_val_130__506"<INT64,?> ⬅️ ::Cast(%"_val_129__506") {to=7}
2969 | # Constant_181__506
%"_val_131__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2970 | # Reshape_182__506
%"_val_132__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_130__506", %"_val_131__506") {allowzero=0}
2971 | # Constant_183__506
%"_val_133__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2972 | # Cast_184__506
%"_val_134__506"<INT64,?> ⬅️ ::Cast(%"_val_133__506") {to=7}
2973 | # Constant_185__506
%"_val_135__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2974 | # Reshape_186__506
%"_val_136__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_134__506", %"_val_135__506") {allowzero=0}
2975 | # Slice_187__506
%"slice_77__506"<FLOAT16,[unk__767,unk__768,unk__769,unk__770]> ⬅️ ::Slice(%"model_1_11", %"_val_124__506", %"_val_128__506", %"_val_132__506", %"_val_136__506")
2976 | # Constant_188__506
%"_val_138__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2977 | # Cast_189__506
%"_val_139__506"<INT64,?> ⬅️ ::Cast(%"_val_138__506") {to=7}
2978 | # Constant_190__506
%"_val_140__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2979 | # Reshape_191__506
%"_val_141__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_139__506", %"_val_140__506") {allowzero=0}
2980 | # Constant_192__506
%"_val_142__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2981 | # Cast_193__506
%"_val_143__506"<INT64,?> ⬅️ ::Cast(%"_val_142__506") {to=7}
2982 | # Constant_194__506
%"_val_144__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2983 | # Reshape_195__506
%"_val_145__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_143__506", %"_val_144__506") {allowzero=0}
2984 | # Constant_196__506
%"_val_146__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2985 | # Cast_197__506
%"_val_147__506"<INT64,?> ⬅️ ::Cast(%"_val_146__506") {to=7}
2986 | # Constant_198__506
%"_val_148__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2987 | # Reshape_199__506
%"_val_149__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_147__506", %"_val_148__506") {allowzero=0}
2988 | # Constant_200__506
%"_val_150__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2989 | # Cast_201__506
%"_val_151__506"<INT64,?> ⬅️ ::Cast(%"_val_150__506") {to=7}
2990 | # Constant_202__506
%"_val_152__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2991 | # Reshape_203__506
%"_val_153__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_151__506", %"_val_152__506") {allowzero=0}
2992 | # Slice_204__506
%"slice_78__506"<FLOAT16,[unk__771,unk__772,unk__773,unk__774]> ⬅️ ::Slice(%"slice_77__506", %"_val_141__506", %"_val_145__506", %"_val_149__506", %"_val_153__506")
2993 | # n0__541
%"dim__541"<INT64,?> ⬅️ ::Constant() {value_int=2}
2994 | # n1__541
%"dim_0__541"<INT64,?> ⬅️ ::Cast(%"dim__541") {to=7}
2995 | # n2__541
%"unsqueeze_23__506"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_78__506", %"dim_0__541")
2996 | # Constant_206__506
%"_val_156__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
2997 | # Cast_207__506
%"_val_157__506"<INT64,?> ⬅️ ::Cast(%"_val_156__506") {to=7}
2998 | # Constant_208__506
%"_val_158__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
2999 | # Reshape_209__506
%"_val_159__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_157__506", %"_val_158__506") {allowzero=0}
3000 | # Constant_210__506
%"_val_160__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3001 | # Cast_211__506
%"_val_161__506"<INT64,?> ⬅️ ::Cast(%"_val_160__506") {to=7}
3002 | # Constant_212__506
%"_val_162__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3003 | # Reshape_213__506
%"_val_163__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_161__506", %"_val_162__506") {allowzero=0}
3004 | # Constant_214__506
%"_val_164__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3005 | # Cast_215__506
%"_val_165__506"<INT64,?> ⬅️ ::Cast(%"_val_164__506") {to=7}
3006 | # Constant_216__506
%"_val_166__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3007 | # Reshape_217__506
%"_val_167__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_165__506", %"_val_166__506") {allowzero=0}
3008 | # Constant_218__506
%"_val_168__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3009 | # Cast_219__506
%"_val_169__506"<INT64,?> ⬅️ ::Cast(%"_val_168__506") {to=7}
3010 | # Constant_220__506
%"_val_170__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3011 | # Reshape_221__506
%"_val_171__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_169__506", %"_val_170__506") {allowzero=0}
3012 | # Slice_222__506
%"slice_79__506"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_23__506", %"_val_159__506", %"_val_163__506", %"_val_167__506", %"_val_171__506")
3013 | # Constant_223__506
%"_val_173__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3014 | # Cast_224__506
%"_val_174__506"<INT64,?> ⬅️ ::Cast(%"_val_173__506") {to=7}
3015 | # Constant_225__506
%"_val_175__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3016 | # Reshape_226__506
%"_val_176__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_174__506", %"_val_175__506") {allowzero=0}
3017 | # Constant_227__506
%"_val_177__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3018 | # Cast_228__506
%"_val_178__506"<INT64,?> ⬅️ ::Cast(%"_val_177__506") {to=7}
3019 | # Constant_229__506
%"_val_179__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3020 | # Reshape_230__506
%"_val_180__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_178__506", %"_val_179__506") {allowzero=0}
3021 | # Constant_231__506
%"_val_181__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3022 | # Cast_232__506
%"_val_182__506"<INT64,?> ⬅️ ::Cast(%"_val_181__506") {to=7}
3023 | # Constant_233__506
%"_val_183__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3024 | # Reshape_234__506
%"_val_184__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_182__506", %"_val_183__506") {allowzero=0}
3025 | # Constant_235__506
%"_val_185__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3026 | # Cast_236__506
%"_val_186__506"<INT64,?> ⬅️ ::Cast(%"_val_185__506") {to=7}
3027 | # Constant_237__506
%"_val_187__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3028 | # Reshape_238__506
%"_val_188__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_186__506", %"_val_187__506") {allowzero=0}
3029 | # Slice_239__506
%"slice_80__506"<FLOAT16,?> ⬅️ ::Slice(%"slice_79__506", %"_val_176__506", %"_val_180__506", %"_val_184__506", %"_val_188__506")
3030 | # Constant_240__506
%"_val_190__506"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
3031 | # n0__542
%"size_0__542"<INT64,[5]> ⬅️ ::Cast(%"_val_190__506") {to=7}
3032 | # n1__542
%"size_1__542"<INT64,[5]> ⬅️ ::Abs(%"size_0__542")
3033 | # n2__542
%"expand_10__506"<FLOAT16,?> ⬅️ ::Expand(%"slice_80__506", %"size_1__542")
3034 | # n0__543
%"clone_10__506"<FLOAT16,?> ⬅️ ::Identity(%"expand_10__506")
3035 | # Constant_243__506
%"_val_193__506"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
3036 | # n0__544
%"size_0__544"<INT64,[4]> ⬅️ ::Cast(%"_val_193__506") {to=7}
3037 | # n1__544
%"view_110__506"<FLOAT16,[unk__775,unk__776,unk__777,unk__778]> ⬅️ ::Reshape(%"clone_10__506", %"size_0__544")
3038 | # Constant_245__506
%"_val_195__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3039 | # Cast_246__506
%"_val_196__506"<INT64,?> ⬅️ ::Cast(%"_val_195__506") {to=7}
3040 | # Constant_247__506
%"_val_197__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3041 | # Reshape_248__506
%"_val_198__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_196__506", %"_val_197__506") {allowzero=0}
3042 | # Constant_249__506
%"_val_199__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3043 | # Cast_250__506
%"_val_200__506"<INT64,?> ⬅️ ::Cast(%"_val_199__506") {to=7}
3044 | # Constant_251__506
%"_val_201__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3045 | # Reshape_252__506
%"_val_202__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_200__506", %"_val_201__506") {allowzero=0}
3046 | # Constant_253__506
%"_val_203__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3047 | # Cast_254__506
%"_val_204__506"<INT64,?> ⬅️ ::Cast(%"_val_203__506") {to=7}
3048 | # Constant_255__506
%"_val_205__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3049 | # Reshape_256__506
%"_val_206__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_204__506", %"_val_205__506") {allowzero=0}
3050 | # Constant_257__506
%"_val_207__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3051 | # Cast_258__506
%"_val_208__506"<INT64,?> ⬅️ ::Cast(%"_val_207__506") {to=7}
3052 | # Constant_259__506
%"_val_209__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3053 | # Reshape_260__506
%"_val_210__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_208__506", %"_val_209__506") {allowzero=0}
3054 | # Slice_261__506
%"slice_81__506"<FLOAT16,[unk__779,unk__780,unk__781,unk__782]> ⬅️ ::Slice(%"model_1_10", %"_val_198__506", %"_val_202__506", %"_val_206__506", %"_val_210__506")
3055 | # Constant_262__506
%"_val_212__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3056 | # Cast_263__506
%"_val_213__506"<INT64,?> ⬅️ ::Cast(%"_val_212__506") {to=7}
3057 | # Constant_264__506
%"_val_214__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3058 | # Reshape_265__506
%"_val_215__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_213__506", %"_val_214__506") {allowzero=0}
3059 | # Constant_266__506
%"_val_216__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3060 | # Cast_267__506
%"_val_217__506"<INT64,?> ⬅️ ::Cast(%"_val_216__506") {to=7}
3061 | # Constant_268__506
%"_val_218__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3062 | # Reshape_269__506
%"_val_219__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_217__506", %"_val_218__506") {allowzero=0}
3063 | # Constant_270__506
%"_val_220__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3064 | # Cast_271__506
%"_val_221__506"<INT64,?> ⬅️ ::Cast(%"_val_220__506") {to=7}
3065 | # Constant_272__506
%"_val_222__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3066 | # Reshape_273__506
%"_val_223__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_221__506", %"_val_222__506") {allowzero=0}
3067 | # Constant_274__506
%"_val_224__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3068 | # Cast_275__506
%"_val_225__506"<INT64,?> ⬅️ ::Cast(%"_val_224__506") {to=7}
3069 | # Constant_276__506
%"_val_226__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3070 | # Reshape_277__506
%"_val_227__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_225__506", %"_val_226__506") {allowzero=0}
3071 | # Slice_278__506
%"slice_82__506"<FLOAT16,[unk__783,unk__784,unk__785,unk__786]> ⬅️ ::Slice(%"slice_81__506", %"_val_215__506", %"_val_219__506", %"_val_223__506", %"_val_227__506")
3072 | # n0__545
%"dim__545"<INT64,?> ⬅️ ::Constant() {value_int=2}
3073 | # n1__545
%"dim_0__545"<INT64,?> ⬅️ ::Cast(%"dim__545") {to=7}
3074 | # n2__545
%"unsqueeze_24__506"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_82__506", %"dim_0__545")
3075 | # Constant_280__506
%"_val_230__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3076 | # Cast_281__506
%"_val_231__506"<INT64,?> ⬅️ ::Cast(%"_val_230__506") {to=7}
3077 | # Constant_282__506
%"_val_232__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3078 | # Reshape_283__506
%"_val_233__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_231__506", %"_val_232__506") {allowzero=0}
3079 | # Constant_284__506
%"_val_234__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3080 | # Cast_285__506
%"_val_235__506"<INT64,?> ⬅️ ::Cast(%"_val_234__506") {to=7}
3081 | # Constant_286__506
%"_val_236__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3082 | # Reshape_287__506
%"_val_237__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_235__506", %"_val_236__506") {allowzero=0}
3083 | # Constant_288__506
%"_val_238__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3084 | # Cast_289__506
%"_val_239__506"<INT64,?> ⬅️ ::Cast(%"_val_238__506") {to=7}
3085 | # Constant_290__506
%"_val_240__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3086 | # Reshape_291__506
%"_val_241__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_239__506", %"_val_240__506") {allowzero=0}
3087 | # Constant_292__506
%"_val_242__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3088 | # Cast_293__506
%"_val_243__506"<INT64,?> ⬅️ ::Cast(%"_val_242__506") {to=7}
3089 | # Constant_294__506
%"_val_244__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3090 | # Reshape_295__506
%"_val_245__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_243__506", %"_val_244__506") {allowzero=0}
3091 | # Slice_296__506
%"slice_83__506"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_24__506", %"_val_233__506", %"_val_237__506", %"_val_241__506", %"_val_245__506")
3092 | # Constant_297__506
%"_val_247__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3093 | # Cast_298__506
%"_val_248__506"<INT64,?> ⬅️ ::Cast(%"_val_247__506") {to=7}
3094 | # Constant_299__506
%"_val_249__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3095 | # Reshape_300__506
%"_val_250__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_248__506", %"_val_249__506") {allowzero=0}
3096 | # Constant_301__506
%"_val_251__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3097 | # Cast_302__506
%"_val_252__506"<INT64,?> ⬅️ ::Cast(%"_val_251__506") {to=7}
3098 | # Constant_303__506
%"_val_253__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3099 | # Reshape_304__506
%"_val_254__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_252__506", %"_val_253__506") {allowzero=0}
3100 | # Constant_305__506
%"_val_255__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3101 | # Cast_306__506
%"_val_256__506"<INT64,?> ⬅️ ::Cast(%"_val_255__506") {to=7}
3102 | # Constant_307__506
%"_val_257__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3103 | # Reshape_308__506
%"_val_258__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_256__506", %"_val_257__506") {allowzero=0}
3104 | # Constant_309__506
%"_val_259__506"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3105 | # Cast_310__506
%"_val_260__506"<INT64,?> ⬅️ ::Cast(%"_val_259__506") {to=7}
3106 | # Constant_311__506
%"_val_261__506"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3107 | # Reshape_312__506
%"_val_262__506"<INT64,[1]> ⬅️ ::Reshape(%"_val_260__506", %"_val_261__506") {allowzero=0}
3108 | # Slice_313__506
%"slice_84__506"<FLOAT16,?> ⬅️ ::Slice(%"slice_83__506", %"_val_250__506", %"_val_254__506", %"_val_258__506", %"_val_262__506")
3109 | # Constant_314__506
%"_val_264__506"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
3110 | # n0__546
%"size_0__546"<INT64,[5]> ⬅️ ::Cast(%"_val_264__506") {to=7}
3111 | # n1__546
%"size_1__546"<INT64,[5]> ⬅️ ::Abs(%"size_0__546")
3112 | # n2__546
%"expand_11__506"<FLOAT16,?> ⬅️ ::Expand(%"slice_84__506", %"size_1__546")
3113 | # n0__547
%"clone_11__506"<FLOAT16,?> ⬅️ ::Identity(%"expand_11__506")
3114 | # Constant_317__506
%"_val_267__506"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
3115 | # n0__548
%"size_0__548"<INT64,[4]> ⬅️ ::Cast(%"_val_267__506") {to=7}
3116 | # n1__548
%"view_111__506"<FLOAT16,[unk__787,unk__788,unk__789,unk__790]> ⬅️ ::Reshape(%"clone_11__506", %"size_0__548")
3117 | # n0__549
%"tmp__549"<INT64,[unk__791]> ⬅️ ::Shape(%"add_31__506")
3118 | # n1__549
%"int64_m1__549"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_m1')}
3119 | # n2__549
%"tmp_subscripted__549"<INT64,?> ⬅️ ::Gather(%"tmp__549", %"int64_m1__549") {axis=0}
3120 | # n3__549
%"embedding_size__549"<FLOAT16,?> ⬅️ ::CastLike(%"tmp_subscripted__549", %"add_31__506")
3121 | # n4__549
%"const__549"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
3122 | # n5__549
%"tmp_0__549"<FLOAT16,?> ⬅️ ::Sqrt(%"embedding_size__549")
3123 | # n6__549
%"const_cast__549"<FLOAT16,?> ⬅️ ::CastLike(%"const__549", %"tmp_0__549")
3124 | # n7__549
%"_val_269__506"<FLOAT16,?> ⬅️ ::Div(%"const_cast__549", %"tmp_0__549")
3125 | # CastLike_320__506
%"_val_270__506"<FLOAT16,?> ⬅️ ::CastLike(%"_val_269__506", %"add_31__506")
3126 | # n0__550
%"tmp__550"<INT64,[unk__792]> ⬅️ ::Shape(%"add_31__506")
3127 | # n1__550
%"int64_0_1d__550"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
3128 | # n2__550
%"int64_1_1d__550"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
3129 | # n3__550
%"int64_m2_1d__550"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
3130 | # n4__550
%"int64_m1_1d__550"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
3131 | # n5__550
%"target_length__550"<INT64,[unk__793]> ⬅️ ::Slice(%"tmp__550", %"int64_m2_1d__550", %"int64_m1_1d__550", %"int64_0_1d__550", %"int64_1_1d__550")
3132 | # n6__550
%"tmp_0__550"<INT64,[4]> ⬅️ ::Shape(%"view_110__506")
3133 | # n7__550
%"int64_0_1d_1__550"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
3134 | # n8__550
%"int64_1_1d_2__550"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
3135 | # n9__550
%"int64_m2_1d_3__550"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_3')}
3136 | # n10__550
%"int64_m1_1d_4__550"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_4')}
3137 | # n11__550
%"source_length__550"<INT64,[1]> ⬅️ ::Slice(%"tmp_0__550", %"int64_m2_1d_3__550", %"int64_m1_1d_4__550", %"int64_0_1d_1__550", %"int64_1_1d_2__550")
3138 | # n12__550
%"size__550"<INT64,[unk__794]> ⬅️ ::Concat(%"target_length__550", %"source_length__550") {axis=0}
3139 | # n13__550
%"const__550"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
3140 | # n14__550
%"attn_mask__550"<FLOAT,?> ⬅️ ::Expand(%"const__550", %"size__550")
3141 | # n15__550
%"attn_mask_5__550"<FLOAT,?> ⬅️ ::Trilu(%"attn_mask__550") {upper=0}
3142 | # n16__550
%"const_6__550"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_6')}
3143 | # n17__550
%"const_6_cast__550"<FLOAT,?> ⬅️ ::CastLike(%"const_6__550", %"attn_mask_5__550")
3144 | # n18__550
%"tmp_7__550"<BOOL,?> ⬅️ ::Equal(%"attn_mask_5__550", %"const_6_cast__550")
3145 | # n19__550
%"tmp_8__550"<FLOAT,?> ⬅️ ::Constant() {value_float=-inf}
3146 | # n20__550
%"const_9__550"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
3147 | # n21__550
%"const_9_cast__550"<FLOAT,?> ⬅️ ::CastLike(%"const_9__550", %"tmp_8__550")
3148 | # n22__550
%"attn_mask_10__550"<FLOAT,?> ⬅️ ::Where(%"tmp_7__550", %"tmp_8__550", %"const_9_cast__550")
3149 | # n23__550
%"_val_271__506"<FLOAT16,?> ⬅️ ::CastLike(%"attn_mask_10__550", %"add_31__506")
3150 | # n0__551
%"key_shape__551"<INT64,[4]> ⬅️ ::Shape(%"view_110__506")
3151 | # n1__551
%"int64_0_1d__551"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
3152 | # n2__551
%"int64_1_1d__551"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
3153 | # n3__551
%"int64_m1_1d__551"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
3154 | # n4__551
%"int64_9223372036854775807_1d__551"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_9223372036854775807_1d')}
3155 | # n5__551
%"key_last_dim__551"<INT64,[1]> ⬅️ ::Slice(%"key_shape__551", %"int64_m1_1d__551", %"int64_9223372036854775807_1d__551", %"int64_0_1d__551", %"int64_1_1d__551")
3156 | # n6__551
%"int64_0_1d_0__551"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_0')}
3157 | # n7__551
%"int64_1_1d_1__551"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_1')}
3158 | # n8__551
%"int64_m2_1d__551"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
3159 | # n9__551
%"int64_m1_1d_2__551"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_2')}
3160 | # n10__551
%"key_second_last_dim__551"<INT64,[1]> ⬅️ ::Slice(%"key_shape__551", %"int64_m2_1d__551", %"int64_m1_1d_2__551", %"int64_0_1d_0__551", %"int64_1_1d_1__551")
3161 | # n11__551
%"int64_0_1d_3__551"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
3162 | # n12__551
%"int64_1_1d_4__551"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
3163 | # n13__551
%"int64_m2_1d_5__551"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_5')}
3164 | # n14__551
%"key_first_dims__551"<INT64,[2]> ⬅️ ::Slice(%"key_shape__551", %"int64_0_1d_3__551", %"int64_m2_1d_5__551", %"int64_0_1d_3__551", %"int64_1_1d_4__551")
3165 | # n15__551
%"tmp__551"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3166 | # n16__551
%"key_squeezed_shape__551"<INT64,[3]> ⬅️ ::Concat(%"tmp__551", %"key_second_last_dim__551", %"key_last_dim__551") {axis=0}
3167 | # n17__551
%"key_squeezed__551"<FLOAT16,[unk__795,unk__796,unk__797]> ⬅️ ::Reshape(%"view_110__506", %"key_squeezed_shape__551")
3168 | # n18__551
%"key_squeezed_transposed__551"<FLOAT16,[unk__795,unk__797,unk__796]> ⬅️ ::Transpose(%"key_squeezed__551") {perm=[0, 2, 1]}
3169 | # n19__551
%"key_transposed_shape__551"<INT64,[4]> ⬅️ ::Concat(%"key_first_dims__551", %"key_last_dim__551", %"key_second_last_dim__551") {axis=0}
3170 | # n20__551
%"key_transposed__551"<FLOAT16,[unk__798,unk__799,unk__800,unk__801]> ⬅️ ::Reshape(%"key_squeezed_transposed__551", %"key_transposed_shape__551")
3171 | # n21__551
%"tmp_6__551"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__506")
3172 | # n22__551
%"query_scaled__551"<FLOAT16,?> ⬅️ ::Mul(%"add_31__506", %"tmp_6__551")
3173 | # n23__551
%"tmp_7__551"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__506")
3174 | # n24__551
%"key_transposed_scaled__551"<FLOAT16,[unk__798,unk__799,unk__800,unk__801]> ⬅️ ::Mul(%"key_transposed__551", %"tmp_7__551")
3175 | # n25__551
%"tmp_8__551"<FLOAT16,?> ⬅️ ::MatMul(%"query_scaled__551", %"key_transposed_scaled__551")
3176 | # n26__551
%"tmp_9__551"<FLOAT16,?> ⬅️ ::Add(%"tmp_8__551", %"_val_271__506")
3177 | # n27__551
%"attn_weight__551"<FLOAT16,?> ⬅️ ::Softmax(%"tmp_9__551") {axis=-1}
3178 | # n28__551
%"dropout_p__551"<FLOAT,?> ⬅️ ::Constant() {value_float=0.0}
3179 | # n29__551
%"attn_weight_10__551"<FLOAT16,?>, %"___551"<BOOL,?> ⬅️ ::Dropout(%"attn_weight__551", %"dropout_p__551")
3180 | # n30__551
%"_scaled_dot_product_efficient_attention_5__506"<FLOAT16,?> ⬅️ ::MatMul(%"attn_weight_10__551", %"view_111__506")
3181 | # n0__552
%"query_0__552"<FLOAT16,?> ⬅️ ::Transpose(%"add_31__506") {perm=[0, 2, 1, 3]}
3182 | # n1__552
%"query_shape__552"<INT64,[unk__802]> ⬅️ ::Shape(%"query_0__552")
3183 | # n2__552
%"int64_0_1d__552"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
3184 | # n3__552
%"int64_1_1d__552"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
3185 | # n4__552
%"query_first_dims__552"<INT64,[unk__803]> ⬅️ ::Slice(%"query_shape__552", %"int64_0_1d__552", %"int64_1_1d__552", %"int64_0_1d__552", %"int64_1_1d__552")
3186 | # n5__552
%"int64_0_1d_1__552"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
3187 | # n6__552
%"int64_1_1d_2__552"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
3188 | # n7__552
%"int64_2_1d__552"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_2_1d')}
3189 | # n8__552
%"query_second_dims__552"<INT64,[unk__804]> ⬅️ ::Slice(%"query_shape__552", %"int64_1_1d_2__552", %"int64_2_1d__552", %"int64_0_1d_1__552", %"int64_1_1d_2__552")
3190 | # n9__552
%"int64_0_1d_3__552"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
3191 | # n10__552
%"int64_1_1d_4__552"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
3192 | # n11__552
%"int64_m2_1d__552"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
3193 | # n12__552
%"int64_m1_1d__552"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
3194 | # n13__552
%"num_heads__552"<INT64,[unk__805]> ⬅️ ::Slice(%"query_shape__552", %"int64_m2_1d__552", %"int64_m1_1d__552", %"int64_0_1d_3__552", %"int64_1_1d_4__552")
3195 | # n14__552
%"compute_log_sumexp__552"<INT64,?> ⬅️ ::Constant() {value_int=0}
3196 | # n15__552
%"compute_log_sumexp_as_bool__552"<BOOL,?> ⬅️ ::Cast(%"compute_log_sumexp__552") {to=9}
3197 | # n16__552
%"_scaled_dot_product_efficient_attention_5_1__506"<FLOAT,?> ⬅️ ::If(%"compute_log_sumexp_as_bool__552") {then_branch=
graph(
name=thenGraph_14,
inputs=(
),
outputs=(
%"logsum_exp__552"<FLOAT,?>
),
) {
0 | # n0__552_227
%"tmp__552"<FLOAT,[unk__804]> ⬅️ ::Cast(%"query_second_dims__552") {to=1}
1 | # n1__552_228
%"const__552"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
2 | # n2__552_229
%"const_cast__552"<FLOAT,?> ⬅️ ::CastLike(%"const__552", %"tmp__552")
3 | # n3__552_230
%"tmp_5__552"<FLOAT,[unk__804]> ⬅️ ::Div(%"tmp__552", %"const_cast__552")
4 | # n4__552_231
%"tmp_6__552"<FLOAT,[unk__804]> ⬅️ ::Ceil(%"tmp_5__552")
5 | # n5__552_232
%"const_7__552"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_7')}
6 | # n6__552_233
%"const_7_cast__552"<FLOAT,?> ⬅️ ::CastLike(%"const_7__552", %"tmp_6__552")
7 | # n7__552_234
%"tmp_8__552"<FLOAT,[unk__804]> ⬅️ ::Mul(%"tmp_6__552", %"const_7_cast__552")
8 | # n8__552_235
%"logsumexp_dim__552"<INT64,[unk__804]> ⬅️ ::Cast(%"tmp_8__552") {to=7}
9 | # n9__552_236
%"const_9__552"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
10 | # n10__552_237
%"tmp_10__552"<INT64,[unk__806]> ⬅️ ::Concat(%"query_first_dims__552", %"num_heads__552", %"logsumexp_dim__552") {axis=0}
11 | # n11__552_238
%"logsum_exp__552"<FLOAT,?> ⬅️ ::Expand(%"const_9__552", %"tmp_10__552")
return %"logsum_exp__552"<FLOAT,?>
}, else_branch=
graph(
name=elseGraph_14,
inputs=(
),
outputs=(
%"logsum_exp_14__552"<FLOAT,?>
),
) {
0 | # n0__552_239
%"const_11__552"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_11')}
1 | # n1__552_240
%"int64_0_1d_12__552"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_12')}
2 | # n2__552_241
%"int64_0_1d_12_cast__552"<INT64,[1]> ⬅️ ::CastLike(%"int64_0_1d_12__552", %"num_heads__552")
3 | # n3__552_242
%"tmp_13__552"<INT64,[unk__807]> ⬅️ ::Concat(%"query_first_dims__552", %"num_heads__552", %"int64_0_1d_12_cast__552") {axis=0}
4 | # n4__552_243
%"logsum_exp_14__552"<FLOAT,?> ⬅️ ::Expand(%"const_11__552", %"tmp_13__552")
return %"logsum_exp_14__552"<FLOAT,?>
}}
3198 | # n17__552
%"tmp_16__552"<INT64,[0]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[0]>('Empty_INTS')}
3199 | # n18__552
%"tmp_17__552"<FLOAT,?> ⬅️ ::ConstantOfShape(%"tmp_16__552")
3200 | # n19__552
%"_scaled_dot_product_efficient_attention_5_3__506"<INT64,?> ⬅️ ::Cast(%"tmp_17__552") {to=7}
3201 | # Transpose_324__506
%"transpose_23__506"<FLOAT16,?> ⬅️ ::Transpose(%"_scaled_dot_product_efficient_attention_5__506") {perm=[0, 2, 1, 3]}
3202 | # Constant_325__506
%"_val_276__506"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3203 | # n0__553
%"size_0__553"<INT64,[3]> ⬅️ ::Cast(%"_val_276__506") {to=7}
3204 | # n1__553
%"view_112__506"<FLOAT16,[unk__808,unk__809,unk__810]> ⬅️ ::Reshape(%"transpose_23__506", %"size_0__553")
3205 | # n0__556
%"tmp__556"<INT64,[2]> ⬅️ ::Shape(%"model.layers.5.self_attn.o_proj.weight")
3206 | # n1__556
%"rank__555"<INT64,?> ⬅️ ::Size(%"tmp__556")
3207 | # n1__555
%"int64_2__555"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
3208 | # n2__555
%"int64_2_cast__555"<INT64,?> ⬅️ ::CastLike(%"int64_2__555", %"rank__555")
3209 | # n3__555
%"cond__555"<BOOL,?> ⬅️ ::Equal(%"rank__555", %"int64_2_cast__555")
3210 | # n4__555
%"t_38__554"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__555") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__555"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__555_244
%"result__555"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.5.self_attn.o_proj.weight") {perm=[1, 0]}
return %"result__555"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__555"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__555_245
%"result_0__555"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.5.self_attn.o_proj.weight")
return %"result_0__555"<FLOAT16,[4096,4096]>
}}
3211 | # Constant_3__554
%"_val_3__554"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
3212 | # n0__557
%"size_0__557"<INT64,[2]> ⬅️ ::Cast(%"_val_3__554") {to=7}
3213 | # n1__557
%"view_113__554"<FLOAT16,[unk__811,unk__812]> ⬅️ ::Reshape(%"view_112__506", %"size_0__557")
3214 | # n0__558
%"mm_38__554"<FLOAT16,[unk__811,4096]> ⬅️ ::MatMul(%"view_113__554", %"t_38__554")
3215 | # Constant_6__554
%"_val_6__554"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3216 | # n0__559
%"size_0__559"<INT64,[3]> ⬅️ ::Cast(%"_val_6__554") {to=7}
3217 | # n1__559
%"model_layers_5_self_attn_1_2__496"<FLOAT16,[unk__813,unk__814,unk__815]> ⬅️ ::Reshape(%"mm_38__554", %"size_0__559")
3218 | # n0__560
%"alpha__560"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
3219 | # n1__560
%"alpha_0__560"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__560", %"model_layers_5_self_attn_1_2__496")
3220 | # n2__560
%"other_1__560"<FLOAT16,[unk__813,unk__814,unk__815]> ⬅️ ::Mul(%"model_layers_5_self_attn_1_2__496", %"alpha_0__560")
3221 | # n3__560
%"add_33__496"<FLOAT16,[unk__816,128,4096]> ⬅️ ::Add(%"model_layers_4_1_2__1", %"other_1__560")
3222 | # Cast_3__561
%"_to_copy_32__561"<FLOAT,[unk__816,128,4096]> ⬅️ ::Cast(%"add_33__496") {to=1}
3223 | # Constant_4__561
%"_val_2__561"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3224 | # Cast_5__561
%"scalar_tensor_default_11__561"<FLOAT,?> ⬅️ ::Cast(%"_val_2__561") {to=1}
3225 | # n0__562
%"pow_12__561"<FLOAT,[unk__816,128,4096]> ⬅️ ::Pow(%"_to_copy_32__561", %"scalar_tensor_default_11__561")
3226 | # Constant_7__561
%"_val_5__561"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
3227 | # n0__564
%"tmp__564"<INT64,[3]> ⬅️ ::Shape(%"pow_12__561")
3228 | # n1__564
%"tmp_0__564"<INT64,?> ⬅️ ::Size(%"tmp__564")
3229 | # n2__564
%"tmp_1__564"<INT64,?> ⬅️ ::Constant() {value_int=0}
3230 | # n3__564
%"cond__563"<BOOL,?> ⬅️ ::Equal(%"tmp_0__564", %"tmp_1__564")
3231 | # n1__563
%"mean_11__561"<FLOAT,?> ⬅️ ::If(%"cond__563") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__563"<FLOAT,[unk__816,128,4096]>
),
) {
0 | # n0__563_246
%"result__563"<FLOAT,[unk__816,128,4096]> ⬅️ ::Identity(%"pow_12__561")
return %"result__563"<FLOAT,[unk__816,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__563"<FLOAT,?>
),
) {
0 | # n0__565
%"tmp__565"<INT64,[1]> ⬅️ ::Shape(%"_val_5__561")
1 | # n1__565
%"tmp_0__565"<INT64,?> ⬅️ ::Size(%"tmp__565")
2 | # n2__565
%"tmp_1__565"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__565
%"cond_0__563"<BOOL,?> ⬅️ ::Equal(%"tmp_0__565", %"tmp_1__565")
4 | # n1__563_248
%"dim_3__563"<INT64,?> ⬅️ ::If(%"cond_0__563") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__563"<INT64,[1,1]>
),
) {
0 | # n0__563_249
%"int64_0__563"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__563_250
%"dim_1__563"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__561", %"int64_0__563")
return %"dim_1__563"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__563"<INT64,[1]>
),
) {
0 | # n0__563_251
%"dim_2__563"<INT64,[1]> ⬅️ ::Identity(%"_val_5__561")
return %"dim_2__563"<INT64,[1]>
}}
5 | # n2__563
%"result_4__563"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_12__561", %"dim_3__563") {keepdims=1}
return %"result_4__563"<FLOAT,?>
}}
3232 | # Constant_9__561
%"_val_7__561"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
3233 | # n0__566
%"alpha__566"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
3234 | # n1__566
%"alpha_0__566"<FLOAT,?> ⬅️ ::CastLike(%"alpha__566", %"_val_7__561")
3235 | # n2__566
%"other_1__566"<FLOAT,?> ⬅️ ::Mul(%"_val_7__561", %"alpha_0__566")
3236 | # n3__566
%"add_34__561"<FLOAT,?> ⬅️ ::Add(%"mean_11__561", %"other_1__566")
3237 | # n0__567
%"tmp__567"<FLOAT,?> ⬅️ ::Sqrt(%"add_34__561")
3238 | # n1__567
%"rsqrt_11__561"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__567")
3239 | # n0__568
%"mul_56__561"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_32__561", %"rsqrt_11__561")
3240 | # Cast_13__561
%"_to_copy_33__561"<FLOAT16,?> ⬅️ ::Cast(%"mul_56__561") {to=10}
3241 | # n0__569
%"model_layers_5_post_attention_layernorm_1__496"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.5.post_attention_layernorm.weight", %"_to_copy_33__561")
3242 | # n0__573
%"tmp__573"<INT64,[2]> ⬅️ ::Shape(%"model.layers.5.mlp.gate_proj.weight")
3243 | # n1__573
%"rank__572"<INT64,?> ⬅️ ::Size(%"tmp__573")
3244 | # n1__572
%"int64_2__572"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
3245 | # n2__572
%"int64_2_cast__572"<INT64,?> ⬅️ ::CastLike(%"int64_2__572", %"rank__572")
3246 | # n3__572
%"cond__572"<BOOL,?> ⬅️ ::Equal(%"rank__572", %"int64_2_cast__572")
3247 | # n4__572
%"t_39__571"<FLOAT16,[unk__817,unk__818]> ⬅️ ::If(%"cond__572") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__572"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__572_252
%"result__572"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.5.mlp.gate_proj.weight") {perm=[1, 0]}
return %"result__572"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__572"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__572_253
%"result_0__572"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.5.mlp.gate_proj.weight")
return %"result_0__572"<FLOAT16,[14336,4096]>
}}
3248 | # Constant_3__571
%"_val_3__571"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
3249 | # n0__574
%"size_0__574"<INT64,[2]> ⬅️ ::Cast(%"_val_3__571") {to=7}
3250 | # n1__574
%"view_115__571"<FLOAT16,[unk__819,unk__820]> ⬅️ ::Reshape(%"model_layers_5_post_attention_layernorm_1__496", %"size_0__574")
3251 | # n0__575
%"mm_39__571"<FLOAT16,[unk__819,unk__818]> ⬅️ ::MatMul(%"view_115__571", %"t_39__571")
3252 | # Constant_6__571
%"_val_6__571"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3253 | # n0__576
%"size_0__576"<INT64,[3]> ⬅️ ::Cast(%"_val_6__571") {to=7}
3254 | # n1__576
%"model_layers_5_mlp_gate_proj_1__570"<FLOAT16,[unk__821,unk__822,unk__823]> ⬅️ ::Reshape(%"mm_39__571", %"size_0__576")
3255 | # Cast_0__577
%"_to_copy_34__577"<FLOAT,[unk__821,unk__822,unk__823]> ⬅️ ::Cast(%"model_layers_5_mlp_gate_proj_1__570") {to=1}
3256 | # n0__578
%"sigmoid_5__577"<FLOAT,[unk__821,unk__822,unk__823]> ⬅️ ::Sigmoid(%"_to_copy_34__577")
3257 | # n0__579
%"mul_58__577"<FLOAT,[unk__821,unk__822,unk__823]> ⬅️ ::Mul(%"_to_copy_34__577", %"sigmoid_5__577")
3258 | # Cast_3__577
%"model_layers_5_mlp_act_fn_1__570"<FLOAT16,[unk__821,unk__822,unk__823]> ⬅️ ::Cast(%"mul_58__577") {to=10}
3259 | # n0__582
%"tmp__582"<INT64,[2]> ⬅️ ::Shape(%"model.layers.5.mlp.up_proj.weight")
3260 | # n1__582
%"rank__581"<INT64,?> ⬅️ ::Size(%"tmp__582")
3261 | # n1__581
%"int64_2__581"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
3262 | # n2__581
%"int64_2_cast__581"<INT64,?> ⬅️ ::CastLike(%"int64_2__581", %"rank__581")
3263 | # n3__581
%"cond__581"<BOOL,?> ⬅️ ::Equal(%"rank__581", %"int64_2_cast__581")
3264 | # n4__581
%"t_40__580"<FLOAT16,[unk__824,unk__825]> ⬅️ ::If(%"cond__581") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__581"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__581_254
%"result__581"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.5.mlp.up_proj.weight") {perm=[1, 0]}
return %"result__581"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__581"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__581_255
%"result_0__581"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.5.mlp.up_proj.weight")
return %"result_0__581"<FLOAT16,[14336,4096]>
}}
3265 | # Constant_3__580
%"_val_3__580"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
3266 | # n0__583
%"size_0__583"<INT64,[2]> ⬅️ ::Cast(%"_val_3__580") {to=7}
3267 | # n1__583
%"view_117__580"<FLOAT16,[unk__826,unk__827]> ⬅️ ::Reshape(%"model_layers_5_post_attention_layernorm_1__496", %"size_0__583")
3268 | # n0__584
%"mm_40__580"<FLOAT16,[unk__826,unk__825]> ⬅️ ::MatMul(%"view_117__580", %"t_40__580")
3269 | # Constant_6__580
%"_val_6__580"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3270 | # n0__585
%"size_0__585"<INT64,[3]> ⬅️ ::Cast(%"_val_6__580") {to=7}
3271 | # n1__585
%"model_layers_5_mlp_up_proj_1__570"<FLOAT16,[unk__828,unk__829,unk__830]> ⬅️ ::Reshape(%"mm_40__580", %"size_0__585")
3272 | # n0__586
%"mul_59__570"<FLOAT16,[unk__831,unk__832,unk__833]> ⬅️ ::Mul(%"model_layers_5_mlp_act_fn_1__570", %"model_layers_5_mlp_up_proj_1__570")
3273 | # n0__589
%"tmp__589"<INT64,[2]> ⬅️ ::Shape(%"model.layers.5.mlp.down_proj.weight")
3274 | # n1__589
%"rank__588"<INT64,?> ⬅️ ::Size(%"tmp__589")
3275 | # n1__588
%"int64_2__588"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
3276 | # n2__588
%"int64_2_cast__588"<INT64,?> ⬅️ ::CastLike(%"int64_2__588", %"rank__588")
3277 | # n3__588
%"cond__588"<BOOL,?> ⬅️ ::Equal(%"rank__588", %"int64_2_cast__588")
3278 | # n4__588
%"t_41__587"<FLOAT16,[unk__834,unk__835]> ⬅️ ::If(%"cond__588") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__588"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__588_256
%"result__588"<FLOAT16,[14336,4096]> ⬅️ ::Transpose(%"model.layers.5.mlp.down_proj.weight") {perm=[1, 0]}
return %"result__588"<FLOAT16,[14336,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__588"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__588_257
%"result_0__588"<FLOAT16,[4096,14336]> ⬅️ ::Identity(%"model.layers.5.mlp.down_proj.weight")
return %"result_0__588"<FLOAT16,[4096,14336]>
}}
3279 | # Constant_3__587
%"_val_3__587"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
3280 | # n0__590
%"size_0__590"<INT64,[2]> ⬅️ ::Cast(%"_val_3__587") {to=7}
3281 | # n1__590
%"view_119__587"<FLOAT16,[unk__836,unk__837]> ⬅️ ::Reshape(%"mul_59__570", %"size_0__590")
3282 | # n0__591
%"mm_41__587"<FLOAT16,[unk__836,unk__835]> ⬅️ ::MatMul(%"view_119__587", %"t_41__587")
3283 | # Constant_6__587
%"_val_6__587"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3284 | # n0__592
%"size_0__592"<INT64,[3]> ⬅️ ::Cast(%"_val_6__587") {to=7}
3285 | # n1__592
%"model_layers_5_mlp_1__496"<FLOAT16,[unk__838,unk__839,unk__840]> ⬅️ ::Reshape(%"mm_41__587", %"size_0__592")
3286 | # n0__593
%"alpha__593"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
3287 | # n1__593
%"alpha_0__593"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__593", %"model_layers_5_mlp_1__496")
3288 | # n2__593
%"other_1__593"<FLOAT16,[unk__838,unk__839,unk__840]> ⬅️ ::Mul(%"model_layers_5_mlp_1__496", %"alpha_0__593")
3289 | # n3__593
%"model_layers_5_1_2__1"<FLOAT16,[unk__841,128,4096]> ⬅️ ::Add(%"add_33__496", %"other_1__593")
3290 | # Cast_3__595
%"_to_copy_36__595"<FLOAT,[unk__841,128,4096]> ⬅️ ::Cast(%"model_layers_5_1_2__1") {to=1}
3291 | # Constant_4__595
%"_val_2__595"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3292 | # Cast_5__595
%"scalar_tensor_default_12__595"<FLOAT,?> ⬅️ ::Cast(%"_val_2__595") {to=1}
3293 | # n0__596
%"pow_13__595"<FLOAT,[unk__841,128,4096]> ⬅️ ::Pow(%"_to_copy_36__595", %"scalar_tensor_default_12__595")
3294 | # Constant_7__595
%"_val_5__595"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
3295 | # n0__598
%"tmp__598"<INT64,[3]> ⬅️ ::Shape(%"pow_13__595")
3296 | # n1__598
%"tmp_0__598"<INT64,?> ⬅️ ::Size(%"tmp__598")
3297 | # n2__598
%"tmp_1__598"<INT64,?> ⬅️ ::Constant() {value_int=0}
3298 | # n3__598
%"cond__597"<BOOL,?> ⬅️ ::Equal(%"tmp_0__598", %"tmp_1__598")
3299 | # n1__597
%"mean_12__595"<FLOAT,?> ⬅️ ::If(%"cond__597") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__597"<FLOAT,[unk__841,128,4096]>
),
) {
0 | # n0__597_258
%"result__597"<FLOAT,[unk__841,128,4096]> ⬅️ ::Identity(%"pow_13__595")
return %"result__597"<FLOAT,[unk__841,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__597"<FLOAT,?>
),
) {
0 | # n0__599
%"tmp__599"<INT64,[1]> ⬅️ ::Shape(%"_val_5__595")
1 | # n1__599
%"tmp_0__599"<INT64,?> ⬅️ ::Size(%"tmp__599")
2 | # n2__599
%"tmp_1__599"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__599
%"cond_0__597"<BOOL,?> ⬅️ ::Equal(%"tmp_0__599", %"tmp_1__599")
4 | # n1__597_260
%"dim_3__597"<INT64,?> ⬅️ ::If(%"cond_0__597") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__597"<INT64,[1,1]>
),
) {
0 | # n0__597_261
%"int64_0__597"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__597_262
%"dim_1__597"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__595", %"int64_0__597")
return %"dim_1__597"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__597"<INT64,[1]>
),
) {
0 | # n0__597_263
%"dim_2__597"<INT64,[1]> ⬅️ ::Identity(%"_val_5__595")
return %"dim_2__597"<INT64,[1]>
}}
5 | # n2__597
%"result_4__597"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_13__595", %"dim_3__597") {keepdims=1}
return %"result_4__597"<FLOAT,?>
}}
3300 | # Constant_9__595
%"_val_7__595"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
3301 | # n0__600
%"alpha__600"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
3302 | # n1__600
%"alpha_0__600"<FLOAT,?> ⬅️ ::CastLike(%"alpha__600", %"_val_7__595")
3303 | # n2__600
%"other_1__600"<FLOAT,?> ⬅️ ::Mul(%"_val_7__595", %"alpha_0__600")
3304 | # n3__600
%"add_36__595"<FLOAT,?> ⬅️ ::Add(%"mean_12__595", %"other_1__600")
3305 | # n0__601
%"tmp__601"<FLOAT,?> ⬅️ ::Sqrt(%"add_36__595")
3306 | # n1__601
%"rsqrt_12__595"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__601")
3307 | # n0__602
%"mul_60__595"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_36__595", %"rsqrt_12__595")
3308 | # Cast_13__595
%"_to_copy_37__595"<FLOAT16,?> ⬅️ ::Cast(%"mul_60__595") {to=10}
3309 | # n0__603
%"model_layers_6_input_layernorm_1__594"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.6.input_layernorm.weight", %"_to_copy_37__595")
3310 | # n0__607
%"tmp__607"<INT64,[2]> ⬅️ ::Shape(%"model.layers.6.self_attn.q_proj.weight")
3311 | # n1__607
%"rank__606"<INT64,?> ⬅️ ::Size(%"tmp__607")
3312 | # n1__606
%"int64_2__606"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
3313 | # n2__606
%"int64_2_cast__606"<INT64,?> ⬅️ ::CastLike(%"int64_2__606", %"rank__606")
3314 | # n3__606
%"cond__606"<BOOL,?> ⬅️ ::Equal(%"rank__606", %"int64_2_cast__606")
3315 | # n4__606
%"t_42__605"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__606") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__606"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__606_264
%"result__606"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.6.self_attn.q_proj.weight") {perm=[1, 0]}
return %"result__606"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__606"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__606_265
%"result_0__606"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.6.self_attn.q_proj.weight")
return %"result_0__606"<FLOAT16,[4096,4096]>
}}
3316 | # Constant_3__605
%"_val_3__605"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
3317 | # n0__608
%"size_0__608"<INT64,[2]> ⬅️ ::Cast(%"_val_3__605") {to=7}
3318 | # n1__608
%"view_121__605"<FLOAT16,[unk__842,unk__843]> ⬅️ ::Reshape(%"model_layers_6_input_layernorm_1__594", %"size_0__608")
3319 | # n0__609
%"mm_42__605"<FLOAT16,[unk__842,4096]> ⬅️ ::MatMul(%"view_121__605", %"t_42__605")
3320 | # Constant_6__605
%"_val_6__605"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3321 | # n0__610
%"size_0__610"<INT64,[3]> ⬅️ ::Cast(%"_val_6__605") {to=7}
3322 | # n1__610
%"model_layers_6_self_attn_q_proj_1__604"<FLOAT16,[unk__844,unk__845,unk__846]> ⬅️ ::Reshape(%"mm_42__605", %"size_0__610")
3323 | # n0__613
%"tmp__613"<INT64,[2]> ⬅️ ::Shape(%"model.layers.6.self_attn.k_proj.weight")
3324 | # n1__613
%"rank__612"<INT64,?> ⬅️ ::Size(%"tmp__613")
3325 | # n1__612
%"int64_2__612"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
3326 | # n2__612
%"int64_2_cast__612"<INT64,?> ⬅️ ::CastLike(%"int64_2__612", %"rank__612")
3327 | # n3__612
%"cond__612"<BOOL,?> ⬅️ ::Equal(%"rank__612", %"int64_2_cast__612")
3328 | # n4__612
%"t_43__611"<FLOAT16,[unk__847,unk__848]> ⬅️ ::If(%"cond__612") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__612"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__612_266
%"result__612"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.6.self_attn.k_proj.weight") {perm=[1, 0]}
return %"result__612"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__612"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__612_267
%"result_0__612"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.6.self_attn.k_proj.weight")
return %"result_0__612"<FLOAT16,[1024,4096]>
}}
3329 | # Constant_3__611
%"_val_3__611"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
3330 | # n0__614
%"size_0__614"<INT64,[2]> ⬅️ ::Cast(%"_val_3__611") {to=7}
3331 | # n1__614
%"view_123__611"<FLOAT16,[unk__849,unk__850]> ⬅️ ::Reshape(%"model_layers_6_input_layernorm_1__594", %"size_0__614")
3332 | # n0__615
%"mm_43__611"<FLOAT16,[unk__849,unk__848]> ⬅️ ::MatMul(%"view_123__611", %"t_43__611")
3333 | # Constant_6__611
%"_val_6__611"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3334 | # n0__616
%"size_0__616"<INT64,[3]> ⬅️ ::Cast(%"_val_6__611") {to=7}
3335 | # n1__616
%"model_layers_6_self_attn_k_proj_1__604"<FLOAT16,[unk__851,unk__852,unk__853]> ⬅️ ::Reshape(%"mm_43__611", %"size_0__616")
3336 | # n0__619
%"tmp__619"<INT64,[2]> ⬅️ ::Shape(%"model.layers.6.self_attn.v_proj.weight")
3337 | # n1__619
%"rank__618"<INT64,?> ⬅️ ::Size(%"tmp__619")
3338 | # n1__618
%"int64_2__618"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
3339 | # n2__618
%"int64_2_cast__618"<INT64,?> ⬅️ ::CastLike(%"int64_2__618", %"rank__618")
3340 | # n3__618
%"cond__618"<BOOL,?> ⬅️ ::Equal(%"rank__618", %"int64_2_cast__618")
3341 | # n4__618
%"t_44__617"<FLOAT16,[unk__854,unk__855]> ⬅️ ::If(%"cond__618") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__618"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__618_268
%"result__618"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.6.self_attn.v_proj.weight") {perm=[1, 0]}
return %"result__618"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__618"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__618_269
%"result_0__618"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.6.self_attn.v_proj.weight")
return %"result_0__618"<FLOAT16,[1024,4096]>
}}
3342 | # Constant_3__617
%"_val_3__617"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
3343 | # n0__620
%"size_0__620"<INT64,[2]> ⬅️ ::Cast(%"_val_3__617") {to=7}
3344 | # n1__620
%"view_125__617"<FLOAT16,[unk__856,unk__857]> ⬅️ ::Reshape(%"model_layers_6_input_layernorm_1__594", %"size_0__620")
3345 | # n0__621
%"mm_44__617"<FLOAT16,[unk__856,unk__855]> ⬅️ ::MatMul(%"view_125__617", %"t_44__617")
3346 | # Constant_6__617
%"_val_6__617"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3347 | # n0__622
%"size_0__622"<INT64,[3]> ⬅️ ::Cast(%"_val_6__617") {to=7}
3348 | # n1__622
%"model_layers_6_self_attn_v_proj_1__604"<FLOAT16,[unk__858,unk__859,unk__860]> ⬅️ ::Reshape(%"mm_44__617", %"size_0__622")
3349 | # Constant_61__604
%"_val_8__604"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
3350 | # n0__623
%"size_0__623"<INT64,[4]> ⬅️ ::Cast(%"_val_8__604") {to=7}
3351 | # n1__623
%"view_127__604"<FLOAT16,[unk__861,unk__862,unk__863,unk__864]> ⬅️ ::Reshape(%"model_layers_6_self_attn_q_proj_1__604", %"size_0__623")
3352 | # Transpose_63__604
%"transpose_24__604"<FLOAT16,[unk__861,unk__863,unk__862,unk__864]> ⬅️ ::Transpose(%"view_127__604") {perm=[0, 2, 1, 3]}
3353 | # Constant_64__604
%"_val_11__604"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
3354 | # n0__624
%"size_0__624"<INT64,[4]> ⬅️ ::Cast(%"_val_11__604") {to=7}
3355 | # n1__624
%"view_128__604"<FLOAT16,[unk__865,unk__866,unk__867,unk__868]> ⬅️ ::Reshape(%"model_layers_6_self_attn_k_proj_1__604", %"size_0__624")
3356 | # Transpose_66__604
%"transpose_25__604"<FLOAT16,[unk__865,unk__867,unk__866,unk__868]> ⬅️ ::Transpose(%"view_128__604") {perm=[0, 2, 1, 3]}
3357 | # Constant_67__604
%"_val_14__604"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
3358 | # n0__625
%"size_0__625"<INT64,[4]> ⬅️ ::Cast(%"_val_14__604") {to=7}
3359 | # n1__625
%"view_129__604"<FLOAT16,[unk__869,unk__870,unk__871,unk__872]> ⬅️ ::Reshape(%"model_layers_6_self_attn_v_proj_1__604", %"size_0__625")
3360 | # Transpose_69__604
%"model_1_12"<FLOAT16,[1,8,128,128]> ⬅️ ::Transpose(%"view_129__604") {perm=[0, 2, 1, 3]}
3361 | # Constant_8__626
%"_val_1__626"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3362 | # Cast_9__626
%"_val_2__626"<INT64,?> ⬅️ ::Cast(%"_val_1__626") {to=7}
3363 | # Constant_10__626
%"_val_3__626"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3364 | # Reshape_11__626
%"_val_4__626"<INT64,[1]> ⬅️ ::Reshape(%"_val_2__626", %"_val_3__626") {allowzero=0}
3365 | # Constant_12__626
%"_val_5__626"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3366 | # Cast_13__626
%"_val_6__626"<INT64,?> ⬅️ ::Cast(%"_val_5__626") {to=7}
3367 | # Constant_14__626
%"_val_7__626"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3368 | # Reshape_15__626
%"_val_8__626"<INT64,[1]> ⬅️ ::Reshape(%"_val_6__626", %"_val_7__626") {allowzero=0}
3369 | # Constant_16__626
%"_val_9__626"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3370 | # Cast_17__626
%"_val_10__626"<INT64,?> ⬅️ ::Cast(%"_val_9__626") {to=7}
3371 | # Constant_18__626
%"_val_11__626"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3372 | # Reshape_19__626
%"_val_12__626"<INT64,[1]> ⬅️ ::Reshape(%"_val_10__626", %"_val_11__626") {allowzero=0}
3373 | # Constant_20__626
%"_val_13__626"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3374 | # Cast_21__626
%"_val_14__626"<INT64,?> ⬅️ ::Cast(%"_val_13__626") {to=7}
3375 | # Constant_22__626
%"_val_15__626"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3376 | # Reshape_23__626
%"_val_16__626"<INT64,[1]> ⬅️ ::Reshape(%"_val_14__626", %"_val_15__626") {allowzero=0}
3377 | # Slice_24__626
%"model_layers_6_self_attn_rotary_emb_1__604"<FLOAT16,[unk__873,unk__874]> ⬅️ ::Slice(%"model.layers.6.self_attn.rotary_emb.cos_cached", %"_val_4__626", %"_val_8__626", %"_val_12__626", %"_val_16__626")
3378 | # Constant_25__626
%"_val_19__626"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3379 | # Cast_26__626
%"_val_20__626"<INT64,?> ⬅️ ::Cast(%"_val_19__626") {to=7}
3380 | # Constant_27__626
%"_val_21__626"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3381 | # Reshape_28__626
%"_val_22__626"<INT64,[1]> ⬅️ ::Reshape(%"_val_20__626", %"_val_21__626") {allowzero=0}
3382 | # Constant_29__626
%"_val_23__626"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3383 | # Cast_30__626
%"_val_24__626"<INT64,?> ⬅️ ::Cast(%"_val_23__626") {to=7}
3384 | # Constant_31__626
%"_val_25__626"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3385 | # Reshape_32__626
%"_val_26__626"<INT64,[1]> ⬅️ ::Reshape(%"_val_24__626", %"_val_25__626") {allowzero=0}
3386 | # Constant_33__626
%"_val_27__626"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3387 | # Cast_34__626
%"_val_28__626"<INT64,?> ⬅️ ::Cast(%"_val_27__626") {to=7}
3388 | # Constant_35__626
%"_val_29__626"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3389 | # Reshape_36__626
%"_val_30__626"<INT64,[1]> ⬅️ ::Reshape(%"_val_28__626", %"_val_29__626") {allowzero=0}
3390 | # Constant_37__626
%"_val_31__626"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3391 | # Cast_38__626
%"_val_32__626"<INT64,?> ⬅️ ::Cast(%"_val_31__626") {to=7}
3392 | # Constant_39__626
%"_val_33__626"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3393 | # Reshape_40__626
%"_val_34__626"<INT64,[1]> ⬅️ ::Reshape(%"_val_32__626", %"_val_33__626") {allowzero=0}
3394 | # Slice_41__626
%"model_layers_6_self_attn_rotary_emb_1_1__604"<FLOAT16,[unk__875,unk__876]> ⬅️ ::Slice(%"model.layers.6.self_attn.rotary_emb.sin_cached", %"_val_22__626", %"_val_26__626", %"_val_30__626", %"_val_34__626")
3395 | # Transpose_71__604
%"_val_21__604"<FLOAT16,[unk__873,unk__874]> ⬅️ ::Transpose(%"model_layers_6_self_attn_rotary_emb_1__604") {perm=[0, 1]}
3396 | # Max_72__604
%"_val_22__604"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
3397 | # Shape_73__604
%"_val_23__604"<INT64,[2]> ⬅️ ::Shape(%"_val_22__604") {start=0}
3398 | # Expand_74__604
%"_val_24__604"<INT64,[unk__877,unk__878]> ⬅️ ::Expand(%"view__1", %"_val_23__604")
3399 | # Constant_75__604
%"_val_25__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3400 | # Unsqueeze_76__604
%"_val_26__604"<INT64,[unk__877,unk__878,1]> ⬅️ ::Unsqueeze(%"_val_24__604", %"_val_25__604")
3401 | # Concat_77__604
%"_val_27__604"<INT64,[unk__877,unk__878,1]> ⬅️ ::Concat(%"_val_26__604") {axis=-1}
3402 | # GatherND_78__604
%"_val_28__604"<FLOAT16,[unk__877,unk__878,unk__874]> ⬅️ ::GatherND(%"_val_21__604", %"_val_27__604") {batch_dims=0}
3403 | # Transpose_79__604
%"index_12__604"<FLOAT16,[unk__877,unk__878,unk__874]> ⬅️ ::Transpose(%"_val_28__604") {perm=[0, 1, 2]}
3404 | # n0__627
%"dim__627"<INT64,?> ⬅️ ::Constant() {value_int=1}
3405 | # n1__627
%"dim_0__627"<INT64,?> ⬅️ ::Cast(%"dim__627") {to=7}
3406 | # n2__627
%"unsqueeze_25__604"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_12__604", %"dim_0__627")
3407 | # Transpose_81__604
%"_val_31__604"<FLOAT16,[unk__875,unk__876]> ⬅️ ::Transpose(%"model_layers_6_self_attn_rotary_emb_1_1__604") {perm=[0, 1]}
3408 | # Max_82__604
%"_val_32__604"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
3409 | # Shape_83__604
%"_val_33__604"<INT64,[2]> ⬅️ ::Shape(%"_val_32__604") {start=0}
3410 | # Expand_84__604
%"_val_34__604"<INT64,[unk__879,unk__880]> ⬅️ ::Expand(%"view__1", %"_val_33__604")
3411 | # Constant_85__604
%"_val_35__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3412 | # Unsqueeze_86__604
%"_val_36__604"<INT64,[unk__879,unk__880,1]> ⬅️ ::Unsqueeze(%"_val_34__604", %"_val_35__604")
3413 | # Concat_87__604
%"_val_37__604"<INT64,[unk__879,unk__880,1]> ⬅️ ::Concat(%"_val_36__604") {axis=-1}
3414 | # GatherND_88__604
%"_val_38__604"<FLOAT16,[unk__879,unk__880,unk__876]> ⬅️ ::GatherND(%"_val_31__604", %"_val_37__604") {batch_dims=0}
3415 | # Transpose_89__604
%"index_13__604"<FLOAT16,[unk__879,unk__880,unk__876]> ⬅️ ::Transpose(%"_val_38__604") {perm=[0, 1, 2]}
3416 | # n0__628
%"dim__628"<INT64,?> ⬅️ ::Constant() {value_int=1}
3417 | # n1__628
%"dim_0__628"<INT64,?> ⬅️ ::Cast(%"dim__628") {to=7}
3418 | # n2__628
%"unsqueeze_26__604"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_13__604", %"dim_0__628")
3419 | # n0__629
%"mul_62__604"<FLOAT16,?> ⬅️ ::Mul(%"transpose_24__604", %"unsqueeze_25__604")
3420 | # Constant_92__604
%"_val_42__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3421 | # Cast_93__604
%"_val_43__604"<INT64,?> ⬅️ ::Cast(%"_val_42__604") {to=7}
3422 | # Constant_94__604
%"_val_44__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3423 | # Reshape_95__604
%"_val_45__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_43__604", %"_val_44__604") {allowzero=0}
3424 | # Constant_96__604
%"_val_46__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3425 | # Cast_97__604
%"_val_47__604"<INT64,?> ⬅️ ::Cast(%"_val_46__604") {to=7}
3426 | # Constant_98__604
%"_val_48__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3427 | # Reshape_99__604
%"_val_49__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_47__604", %"_val_48__604") {allowzero=0}
3428 | # Constant_100__604
%"_val_50__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3429 | # Cast_101__604
%"_val_51__604"<INT64,?> ⬅️ ::Cast(%"_val_50__604") {to=7}
3430 | # Constant_102__604
%"_val_52__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3431 | # Reshape_103__604
%"_val_53__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_51__604", %"_val_52__604") {allowzero=0}
3432 | # Constant_104__604
%"_val_54__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3433 | # Cast_105__604
%"_val_55__604"<INT64,?> ⬅️ ::Cast(%"_val_54__604") {to=7}
3434 | # Constant_106__604
%"_val_56__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3435 | # Reshape_107__604
%"_val_57__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_55__604", %"_val_56__604") {allowzero=0}
3436 | # Slice_108__604
%"slice_87__604"<FLOAT16,[unk__881,unk__882,unk__883,unk__884]> ⬅️ ::Slice(%"transpose_24__604", %"_val_45__604", %"_val_49__604", %"_val_53__604", %"_val_57__604")
3437 | # Constant_109__604
%"_val_59__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3438 | # Cast_110__604
%"_val_60__604"<INT64,?> ⬅️ ::Cast(%"_val_59__604") {to=7}
3439 | # Constant_111__604
%"_val_61__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3440 | # Reshape_112__604
%"_val_62__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_60__604", %"_val_61__604") {allowzero=0}
3441 | # Constant_113__604
%"_val_63__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3442 | # Cast_114__604
%"_val_64__604"<INT64,?> ⬅️ ::Cast(%"_val_63__604") {to=7}
3443 | # Constant_115__604
%"_val_65__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3444 | # Reshape_116__604
%"_val_66__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_64__604", %"_val_65__604") {allowzero=0}
3445 | # Constant_117__604
%"_val_67__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3446 | # Cast_118__604
%"_val_68__604"<INT64,?> ⬅️ ::Cast(%"_val_67__604") {to=7}
3447 | # Constant_119__604
%"_val_69__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3448 | # Reshape_120__604
%"_val_70__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_68__604", %"_val_69__604") {allowzero=0}
3449 | # Constant_121__604
%"_val_71__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3450 | # Cast_122__604
%"_val_72__604"<INT64,?> ⬅️ ::Cast(%"_val_71__604") {to=7}
3451 | # Constant_123__604
%"_val_73__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3452 | # Reshape_124__604
%"_val_74__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_72__604", %"_val_73__604") {allowzero=0}
3453 | # Slice_125__604
%"slice_88__604"<FLOAT16,[unk__885,unk__886,unk__887,unk__888]> ⬅️ ::Slice(%"transpose_24__604", %"_val_62__604", %"_val_66__604", %"_val_70__604", %"_val_74__604")
3454 | # n0__630
%"neg_12__604"<FLOAT16,[unk__885,unk__886,unk__887,unk__888]> ⬅️ ::Neg(%"slice_88__604")
3455 | # SequenceConstruct_127__604
%"77__604"<Sequence(Tensor(FLOAT16)),[unk__889,unk__890,unk__891,unk__892]> ⬅️ ::SequenceConstruct(%"neg_12__604", %"slice_87__604")
3456 | # n0__631
%"cat_12__604"<FLOAT16,[unk__889,unk__890,unk__891,unk__893]> ⬅️ ::ConcatFromSequence(%"77__604") {axis=-1}
3457 | # n0__632
%"mul_63__604"<FLOAT16,?> ⬅️ ::Mul(%"cat_12__604", %"unsqueeze_26__604")
3458 | # n0__633
%"alpha__633"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
3459 | # n1__633
%"alpha_0__633"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__633", %"mul_63__604")
3460 | # n2__633
%"other_1__633"<FLOAT16,?> ⬅️ ::Mul(%"mul_63__604", %"alpha_0__633")
3461 | # n3__633
%"add_37__604"<FLOAT16,?> ⬅️ ::Add(%"mul_62__604", %"other_1__633")
3462 | # n0__634
%"mul_64__604"<FLOAT16,?> ⬅️ ::Mul(%"transpose_25__604", %"unsqueeze_25__604")
3463 | # Constant_132__604
%"_val_82__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3464 | # Cast_133__604
%"_val_83__604"<INT64,?> ⬅️ ::Cast(%"_val_82__604") {to=7}
3465 | # Constant_134__604
%"_val_84__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3466 | # Reshape_135__604
%"_val_85__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_83__604", %"_val_84__604") {allowzero=0}
3467 | # Constant_136__604
%"_val_86__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3468 | # Cast_137__604
%"_val_87__604"<INT64,?> ⬅️ ::Cast(%"_val_86__604") {to=7}
3469 | # Constant_138__604
%"_val_88__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3470 | # Reshape_139__604
%"_val_89__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_87__604", %"_val_88__604") {allowzero=0}
3471 | # Constant_140__604
%"_val_90__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3472 | # Cast_141__604
%"_val_91__604"<INT64,?> ⬅️ ::Cast(%"_val_90__604") {to=7}
3473 | # Constant_142__604
%"_val_92__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3474 | # Reshape_143__604
%"_val_93__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_91__604", %"_val_92__604") {allowzero=0}
3475 | # Constant_144__604
%"_val_94__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3476 | # Cast_145__604
%"_val_95__604"<INT64,?> ⬅️ ::Cast(%"_val_94__604") {to=7}
3477 | # Constant_146__604
%"_val_96__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3478 | # Reshape_147__604
%"_val_97__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_95__604", %"_val_96__604") {allowzero=0}
3479 | # Slice_148__604
%"slice_89__604"<FLOAT16,[unk__894,unk__895,unk__896,unk__897]> ⬅️ ::Slice(%"transpose_25__604", %"_val_85__604", %"_val_89__604", %"_val_93__604", %"_val_97__604")
3480 | # Constant_149__604
%"_val_99__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3481 | # Cast_150__604
%"_val_100__604"<INT64,?> ⬅️ ::Cast(%"_val_99__604") {to=7}
3482 | # Constant_151__604
%"_val_101__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3483 | # Reshape_152__604
%"_val_102__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_100__604", %"_val_101__604") {allowzero=0}
3484 | # Constant_153__604
%"_val_103__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3485 | # Cast_154__604
%"_val_104__604"<INT64,?> ⬅️ ::Cast(%"_val_103__604") {to=7}
3486 | # Constant_155__604
%"_val_105__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3487 | # Reshape_156__604
%"_val_106__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_104__604", %"_val_105__604") {allowzero=0}
3488 | # Constant_157__604
%"_val_107__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3489 | # Cast_158__604
%"_val_108__604"<INT64,?> ⬅️ ::Cast(%"_val_107__604") {to=7}
3490 | # Constant_159__604
%"_val_109__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3491 | # Reshape_160__604
%"_val_110__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_108__604", %"_val_109__604") {allowzero=0}
3492 | # Constant_161__604
%"_val_111__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3493 | # Cast_162__604
%"_val_112__604"<INT64,?> ⬅️ ::Cast(%"_val_111__604") {to=7}
3494 | # Constant_163__604
%"_val_113__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3495 | # Reshape_164__604
%"_val_114__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_112__604", %"_val_113__604") {allowzero=0}
3496 | # Slice_165__604
%"slice_90__604"<FLOAT16,[unk__898,unk__899,unk__900,unk__901]> ⬅️ ::Slice(%"transpose_25__604", %"_val_102__604", %"_val_106__604", %"_val_110__604", %"_val_114__604")
3497 | # n0__635
%"neg_13__604"<FLOAT16,[unk__898,unk__899,unk__900,unk__901]> ⬅️ ::Neg(%"slice_90__604")
3498 | # SequenceConstruct_167__604
%"117__604"<Sequence(Tensor(FLOAT16)),[unk__902,unk__903,unk__904,unk__905]> ⬅️ ::SequenceConstruct(%"neg_13__604", %"slice_89__604")
3499 | # n0__636
%"cat_13__604"<FLOAT16,[unk__902,unk__903,unk__904,unk__906]> ⬅️ ::ConcatFromSequence(%"117__604") {axis=-1}
3500 | # n0__637
%"mul_65__604"<FLOAT16,?> ⬅️ ::Mul(%"cat_13__604", %"unsqueeze_26__604")
3501 | # n0__638
%"alpha__638"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
3502 | # n1__638
%"alpha_0__638"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__638", %"mul_65__604")
3503 | # n2__638
%"other_1__638"<FLOAT16,?> ⬅️ ::Mul(%"mul_65__604", %"alpha_0__638")
3504 | # n3__638
%"model_1_13"<FLOAT16,[1,8,128,128]> ⬅️ ::Add(%"mul_64__604", %"other_1__638")
3505 | # Constant_171__604
%"_val_121__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3506 | # Cast_172__604
%"_val_122__604"<INT64,?> ⬅️ ::Cast(%"_val_121__604") {to=7}
3507 | # Constant_173__604
%"_val_123__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3508 | # Reshape_174__604
%"_val_124__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_122__604", %"_val_123__604") {allowzero=0}
3509 | # Constant_175__604
%"_val_125__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3510 | # Cast_176__604
%"_val_126__604"<INT64,?> ⬅️ ::Cast(%"_val_125__604") {to=7}
3511 | # Constant_177__604
%"_val_127__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3512 | # Reshape_178__604
%"_val_128__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_126__604", %"_val_127__604") {allowzero=0}
3513 | # Constant_179__604
%"_val_129__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3514 | # Cast_180__604
%"_val_130__604"<INT64,?> ⬅️ ::Cast(%"_val_129__604") {to=7}
3515 | # Constant_181__604
%"_val_131__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3516 | # Reshape_182__604
%"_val_132__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_130__604", %"_val_131__604") {allowzero=0}
3517 | # Constant_183__604
%"_val_133__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3518 | # Cast_184__604
%"_val_134__604"<INT64,?> ⬅️ ::Cast(%"_val_133__604") {to=7}
3519 | # Constant_185__604
%"_val_135__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3520 | # Reshape_186__604
%"_val_136__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_134__604", %"_val_135__604") {allowzero=0}
3521 | # Slice_187__604
%"slice_91__604"<FLOAT16,[unk__907,unk__908,unk__909,unk__910]> ⬅️ ::Slice(%"model_1_13", %"_val_124__604", %"_val_128__604", %"_val_132__604", %"_val_136__604")
3522 | # Constant_188__604
%"_val_138__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3523 | # Cast_189__604
%"_val_139__604"<INT64,?> ⬅️ ::Cast(%"_val_138__604") {to=7}
3524 | # Constant_190__604
%"_val_140__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3525 | # Reshape_191__604
%"_val_141__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_139__604", %"_val_140__604") {allowzero=0}
3526 | # Constant_192__604
%"_val_142__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3527 | # Cast_193__604
%"_val_143__604"<INT64,?> ⬅️ ::Cast(%"_val_142__604") {to=7}
3528 | # Constant_194__604
%"_val_144__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3529 | # Reshape_195__604
%"_val_145__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_143__604", %"_val_144__604") {allowzero=0}
3530 | # Constant_196__604
%"_val_146__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3531 | # Cast_197__604
%"_val_147__604"<INT64,?> ⬅️ ::Cast(%"_val_146__604") {to=7}
3532 | # Constant_198__604
%"_val_148__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3533 | # Reshape_199__604
%"_val_149__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_147__604", %"_val_148__604") {allowzero=0}
3534 | # Constant_200__604
%"_val_150__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3535 | # Cast_201__604
%"_val_151__604"<INT64,?> ⬅️ ::Cast(%"_val_150__604") {to=7}
3536 | # Constant_202__604
%"_val_152__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3537 | # Reshape_203__604
%"_val_153__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_151__604", %"_val_152__604") {allowzero=0}
3538 | # Slice_204__604
%"slice_92__604"<FLOAT16,[unk__911,unk__912,unk__913,unk__914]> ⬅️ ::Slice(%"slice_91__604", %"_val_141__604", %"_val_145__604", %"_val_149__604", %"_val_153__604")
3539 | # n0__639
%"dim__639"<INT64,?> ⬅️ ::Constant() {value_int=2}
3540 | # n1__639
%"dim_0__639"<INT64,?> ⬅️ ::Cast(%"dim__639") {to=7}
3541 | # n2__639
%"unsqueeze_27__604"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_92__604", %"dim_0__639")
3542 | # Constant_206__604
%"_val_156__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3543 | # Cast_207__604
%"_val_157__604"<INT64,?> ⬅️ ::Cast(%"_val_156__604") {to=7}
3544 | # Constant_208__604
%"_val_158__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3545 | # Reshape_209__604
%"_val_159__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_157__604", %"_val_158__604") {allowzero=0}
3546 | # Constant_210__604
%"_val_160__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3547 | # Cast_211__604
%"_val_161__604"<INT64,?> ⬅️ ::Cast(%"_val_160__604") {to=7}
3548 | # Constant_212__604
%"_val_162__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3549 | # Reshape_213__604
%"_val_163__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_161__604", %"_val_162__604") {allowzero=0}
3550 | # Constant_214__604
%"_val_164__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3551 | # Cast_215__604
%"_val_165__604"<INT64,?> ⬅️ ::Cast(%"_val_164__604") {to=7}
3552 | # Constant_216__604
%"_val_166__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3553 | # Reshape_217__604
%"_val_167__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_165__604", %"_val_166__604") {allowzero=0}
3554 | # Constant_218__604
%"_val_168__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3555 | # Cast_219__604
%"_val_169__604"<INT64,?> ⬅️ ::Cast(%"_val_168__604") {to=7}
3556 | # Constant_220__604
%"_val_170__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3557 | # Reshape_221__604
%"_val_171__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_169__604", %"_val_170__604") {allowzero=0}
3558 | # Slice_222__604
%"slice_93__604"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_27__604", %"_val_159__604", %"_val_163__604", %"_val_167__604", %"_val_171__604")
3559 | # Constant_223__604
%"_val_173__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3560 | # Cast_224__604
%"_val_174__604"<INT64,?> ⬅️ ::Cast(%"_val_173__604") {to=7}
3561 | # Constant_225__604
%"_val_175__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3562 | # Reshape_226__604
%"_val_176__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_174__604", %"_val_175__604") {allowzero=0}
3563 | # Constant_227__604
%"_val_177__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3564 | # Cast_228__604
%"_val_178__604"<INT64,?> ⬅️ ::Cast(%"_val_177__604") {to=7}
3565 | # Constant_229__604
%"_val_179__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3566 | # Reshape_230__604
%"_val_180__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_178__604", %"_val_179__604") {allowzero=0}
3567 | # Constant_231__604
%"_val_181__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3568 | # Cast_232__604
%"_val_182__604"<INT64,?> ⬅️ ::Cast(%"_val_181__604") {to=7}
3569 | # Constant_233__604
%"_val_183__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3570 | # Reshape_234__604
%"_val_184__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_182__604", %"_val_183__604") {allowzero=0}
3571 | # Constant_235__604
%"_val_185__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3572 | # Cast_236__604
%"_val_186__604"<INT64,?> ⬅️ ::Cast(%"_val_185__604") {to=7}
3573 | # Constant_237__604
%"_val_187__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3574 | # Reshape_238__604
%"_val_188__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_186__604", %"_val_187__604") {allowzero=0}
3575 | # Slice_239__604
%"slice_94__604"<FLOAT16,?> ⬅️ ::Slice(%"slice_93__604", %"_val_176__604", %"_val_180__604", %"_val_184__604", %"_val_188__604")
3576 | # Constant_240__604
%"_val_190__604"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
3577 | # n0__640
%"size_0__640"<INT64,[5]> ⬅️ ::Cast(%"_val_190__604") {to=7}
3578 | # n1__640
%"size_1__640"<INT64,[5]> ⬅️ ::Abs(%"size_0__640")
3579 | # n2__640
%"expand_12__604"<FLOAT16,?> ⬅️ ::Expand(%"slice_94__604", %"size_1__640")
3580 | # n0__641
%"clone_12__604"<FLOAT16,?> ⬅️ ::Identity(%"expand_12__604")
3581 | # Constant_243__604
%"_val_193__604"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
3582 | # n0__642
%"size_0__642"<INT64,[4]> ⬅️ ::Cast(%"_val_193__604") {to=7}
3583 | # n1__642
%"view_130__604"<FLOAT16,[unk__915,unk__916,unk__917,unk__918]> ⬅️ ::Reshape(%"clone_12__604", %"size_0__642")
3584 | # Constant_245__604
%"_val_195__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3585 | # Cast_246__604
%"_val_196__604"<INT64,?> ⬅️ ::Cast(%"_val_195__604") {to=7}
3586 | # Constant_247__604
%"_val_197__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3587 | # Reshape_248__604
%"_val_198__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_196__604", %"_val_197__604") {allowzero=0}
3588 | # Constant_249__604
%"_val_199__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3589 | # Cast_250__604
%"_val_200__604"<INT64,?> ⬅️ ::Cast(%"_val_199__604") {to=7}
3590 | # Constant_251__604
%"_val_201__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3591 | # Reshape_252__604
%"_val_202__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_200__604", %"_val_201__604") {allowzero=0}
3592 | # Constant_253__604
%"_val_203__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3593 | # Cast_254__604
%"_val_204__604"<INT64,?> ⬅️ ::Cast(%"_val_203__604") {to=7}
3594 | # Constant_255__604
%"_val_205__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3595 | # Reshape_256__604
%"_val_206__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_204__604", %"_val_205__604") {allowzero=0}
3596 | # Constant_257__604
%"_val_207__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3597 | # Cast_258__604
%"_val_208__604"<INT64,?> ⬅️ ::Cast(%"_val_207__604") {to=7}
3598 | # Constant_259__604
%"_val_209__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3599 | # Reshape_260__604
%"_val_210__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_208__604", %"_val_209__604") {allowzero=0}
3600 | # Slice_261__604
%"slice_95__604"<FLOAT16,[unk__919,unk__920,unk__921,unk__922]> ⬅️ ::Slice(%"model_1_12", %"_val_198__604", %"_val_202__604", %"_val_206__604", %"_val_210__604")
3601 | # Constant_262__604
%"_val_212__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3602 | # Cast_263__604
%"_val_213__604"<INT64,?> ⬅️ ::Cast(%"_val_212__604") {to=7}
3603 | # Constant_264__604
%"_val_214__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3604 | # Reshape_265__604
%"_val_215__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_213__604", %"_val_214__604") {allowzero=0}
3605 | # Constant_266__604
%"_val_216__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3606 | # Cast_267__604
%"_val_217__604"<INT64,?> ⬅️ ::Cast(%"_val_216__604") {to=7}
3607 | # Constant_268__604
%"_val_218__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3608 | # Reshape_269__604
%"_val_219__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_217__604", %"_val_218__604") {allowzero=0}
3609 | # Constant_270__604
%"_val_220__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3610 | # Cast_271__604
%"_val_221__604"<INT64,?> ⬅️ ::Cast(%"_val_220__604") {to=7}
3611 | # Constant_272__604
%"_val_222__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3612 | # Reshape_273__604
%"_val_223__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_221__604", %"_val_222__604") {allowzero=0}
3613 | # Constant_274__604
%"_val_224__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3614 | # Cast_275__604
%"_val_225__604"<INT64,?> ⬅️ ::Cast(%"_val_224__604") {to=7}
3615 | # Constant_276__604
%"_val_226__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3616 | # Reshape_277__604
%"_val_227__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_225__604", %"_val_226__604") {allowzero=0}
3617 | # Slice_278__604
%"slice_96__604"<FLOAT16,[unk__923,unk__924,unk__925,unk__926]> ⬅️ ::Slice(%"slice_95__604", %"_val_215__604", %"_val_219__604", %"_val_223__604", %"_val_227__604")
3618 | # n0__643
%"dim__643"<INT64,?> ⬅️ ::Constant() {value_int=2}
3619 | # n1__643
%"dim_0__643"<INT64,?> ⬅️ ::Cast(%"dim__643") {to=7}
3620 | # n2__643
%"unsqueeze_28__604"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_96__604", %"dim_0__643")
3621 | # Constant_280__604
%"_val_230__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3622 | # Cast_281__604
%"_val_231__604"<INT64,?> ⬅️ ::Cast(%"_val_230__604") {to=7}
3623 | # Constant_282__604
%"_val_232__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3624 | # Reshape_283__604
%"_val_233__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_231__604", %"_val_232__604") {allowzero=0}
3625 | # Constant_284__604
%"_val_234__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3626 | # Cast_285__604
%"_val_235__604"<INT64,?> ⬅️ ::Cast(%"_val_234__604") {to=7}
3627 | # Constant_286__604
%"_val_236__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3628 | # Reshape_287__604
%"_val_237__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_235__604", %"_val_236__604") {allowzero=0}
3629 | # Constant_288__604
%"_val_238__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3630 | # Cast_289__604
%"_val_239__604"<INT64,?> ⬅️ ::Cast(%"_val_238__604") {to=7}
3631 | # Constant_290__604
%"_val_240__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3632 | # Reshape_291__604
%"_val_241__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_239__604", %"_val_240__604") {allowzero=0}
3633 | # Constant_292__604
%"_val_242__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3634 | # Cast_293__604
%"_val_243__604"<INT64,?> ⬅️ ::Cast(%"_val_242__604") {to=7}
3635 | # Constant_294__604
%"_val_244__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3636 | # Reshape_295__604
%"_val_245__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_243__604", %"_val_244__604") {allowzero=0}
3637 | # Slice_296__604
%"slice_97__604"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_28__604", %"_val_233__604", %"_val_237__604", %"_val_241__604", %"_val_245__604")
3638 | # Constant_297__604
%"_val_247__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3639 | # Cast_298__604
%"_val_248__604"<INT64,?> ⬅️ ::Cast(%"_val_247__604") {to=7}
3640 | # Constant_299__604
%"_val_249__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3641 | # Reshape_300__604
%"_val_250__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_248__604", %"_val_249__604") {allowzero=0}
3642 | # Constant_301__604
%"_val_251__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3643 | # Cast_302__604
%"_val_252__604"<INT64,?> ⬅️ ::Cast(%"_val_251__604") {to=7}
3644 | # Constant_303__604
%"_val_253__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3645 | # Reshape_304__604
%"_val_254__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_252__604", %"_val_253__604") {allowzero=0}
3646 | # Constant_305__604
%"_val_255__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3647 | # Cast_306__604
%"_val_256__604"<INT64,?> ⬅️ ::Cast(%"_val_255__604") {to=7}
3648 | # Constant_307__604
%"_val_257__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3649 | # Reshape_308__604
%"_val_258__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_256__604", %"_val_257__604") {allowzero=0}
3650 | # Constant_309__604
%"_val_259__604"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3651 | # Cast_310__604
%"_val_260__604"<INT64,?> ⬅️ ::Cast(%"_val_259__604") {to=7}
3652 | # Constant_311__604
%"_val_261__604"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3653 | # Reshape_312__604
%"_val_262__604"<INT64,[1]> ⬅️ ::Reshape(%"_val_260__604", %"_val_261__604") {allowzero=0}
3654 | # Slice_313__604
%"slice_98__604"<FLOAT16,?> ⬅️ ::Slice(%"slice_97__604", %"_val_250__604", %"_val_254__604", %"_val_258__604", %"_val_262__604")
3655 | # Constant_314__604
%"_val_264__604"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
3656 | # n0__644
%"size_0__644"<INT64,[5]> ⬅️ ::Cast(%"_val_264__604") {to=7}
3657 | # n1__644
%"size_1__644"<INT64,[5]> ⬅️ ::Abs(%"size_0__644")
3658 | # n2__644
%"expand_13__604"<FLOAT16,?> ⬅️ ::Expand(%"slice_98__604", %"size_1__644")
3659 | # n0__645
%"clone_13__604"<FLOAT16,?> ⬅️ ::Identity(%"expand_13__604")
3660 | # Constant_317__604
%"_val_267__604"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
3661 | # n0__646
%"size_0__646"<INT64,[4]> ⬅️ ::Cast(%"_val_267__604") {to=7}
3662 | # n1__646
%"view_131__604"<FLOAT16,[unk__927,unk__928,unk__929,unk__930]> ⬅️ ::Reshape(%"clone_13__604", %"size_0__646")
3663 | # n0__647
%"tmp__647"<INT64,[unk__931]> ⬅️ ::Shape(%"add_37__604")
3664 | # n1__647
%"int64_m1__647"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_m1')}
3665 | # n2__647
%"tmp_subscripted__647"<INT64,?> ⬅️ ::Gather(%"tmp__647", %"int64_m1__647") {axis=0}
3666 | # n3__647
%"embedding_size__647"<FLOAT16,?> ⬅️ ::CastLike(%"tmp_subscripted__647", %"add_37__604")
3667 | # n4__647
%"const__647"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
3668 | # n5__647
%"tmp_0__647"<FLOAT16,?> ⬅️ ::Sqrt(%"embedding_size__647")
3669 | # n6__647
%"const_cast__647"<FLOAT16,?> ⬅️ ::CastLike(%"const__647", %"tmp_0__647")
3670 | # n7__647
%"_val_269__604"<FLOAT16,?> ⬅️ ::Div(%"const_cast__647", %"tmp_0__647")
3671 | # CastLike_320__604
%"_val_270__604"<FLOAT16,?> ⬅️ ::CastLike(%"_val_269__604", %"add_37__604")
3672 | # n0__648
%"tmp__648"<INT64,[unk__932]> ⬅️ ::Shape(%"add_37__604")
3673 | # n1__648
%"int64_0_1d__648"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
3674 | # n2__648
%"int64_1_1d__648"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
3675 | # n3__648
%"int64_m2_1d__648"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
3676 | # n4__648
%"int64_m1_1d__648"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
3677 | # n5__648
%"target_length__648"<INT64,[unk__933]> ⬅️ ::Slice(%"tmp__648", %"int64_m2_1d__648", %"int64_m1_1d__648", %"int64_0_1d__648", %"int64_1_1d__648")
3678 | # n6__648
%"tmp_0__648"<INT64,[4]> ⬅️ ::Shape(%"view_130__604")
3679 | # n7__648
%"int64_0_1d_1__648"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
3680 | # n8__648
%"int64_1_1d_2__648"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
3681 | # n9__648
%"int64_m2_1d_3__648"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_3')}
3682 | # n10__648
%"int64_m1_1d_4__648"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_4')}
3683 | # n11__648
%"source_length__648"<INT64,[1]> ⬅️ ::Slice(%"tmp_0__648", %"int64_m2_1d_3__648", %"int64_m1_1d_4__648", %"int64_0_1d_1__648", %"int64_1_1d_2__648")
3684 | # n12__648
%"size__648"<INT64,[unk__934]> ⬅️ ::Concat(%"target_length__648", %"source_length__648") {axis=0}
3685 | # n13__648
%"const__648"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
3686 | # n14__648
%"attn_mask__648"<FLOAT,?> ⬅️ ::Expand(%"const__648", %"size__648")
3687 | # n15__648
%"attn_mask_5__648"<FLOAT,?> ⬅️ ::Trilu(%"attn_mask__648") {upper=0}
3688 | # n16__648
%"const_6__648"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_6')}
3689 | # n17__648
%"const_6_cast__648"<FLOAT,?> ⬅️ ::CastLike(%"const_6__648", %"attn_mask_5__648")
3690 | # n18__648
%"tmp_7__648"<BOOL,?> ⬅️ ::Equal(%"attn_mask_5__648", %"const_6_cast__648")
3691 | # n19__648
%"tmp_8__648"<FLOAT,?> ⬅️ ::Constant() {value_float=-inf}
3692 | # n20__648
%"const_9__648"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
3693 | # n21__648
%"const_9_cast__648"<FLOAT,?> ⬅️ ::CastLike(%"const_9__648", %"tmp_8__648")
3694 | # n22__648
%"attn_mask_10__648"<FLOAT,?> ⬅️ ::Where(%"tmp_7__648", %"tmp_8__648", %"const_9_cast__648")
3695 | # n23__648
%"_val_271__604"<FLOAT16,?> ⬅️ ::CastLike(%"attn_mask_10__648", %"add_37__604")
3696 | # n0__649
%"key_shape__649"<INT64,[4]> ⬅️ ::Shape(%"view_130__604")
3697 | # n1__649
%"int64_0_1d__649"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
3698 | # n2__649
%"int64_1_1d__649"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
3699 | # n3__649
%"int64_m1_1d__649"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
3700 | # n4__649
%"int64_9223372036854775807_1d__649"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_9223372036854775807_1d')}
3701 | # n5__649
%"key_last_dim__649"<INT64,[1]> ⬅️ ::Slice(%"key_shape__649", %"int64_m1_1d__649", %"int64_9223372036854775807_1d__649", %"int64_0_1d__649", %"int64_1_1d__649")
3702 | # n6__649
%"int64_0_1d_0__649"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_0')}
3703 | # n7__649
%"int64_1_1d_1__649"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_1')}
3704 | # n8__649
%"int64_m2_1d__649"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
3705 | # n9__649
%"int64_m1_1d_2__649"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_2')}
3706 | # n10__649
%"key_second_last_dim__649"<INT64,[1]> ⬅️ ::Slice(%"key_shape__649", %"int64_m2_1d__649", %"int64_m1_1d_2__649", %"int64_0_1d_0__649", %"int64_1_1d_1__649")
3707 | # n11__649
%"int64_0_1d_3__649"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
3708 | # n12__649
%"int64_1_1d_4__649"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
3709 | # n13__649
%"int64_m2_1d_5__649"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_5')}
3710 | # n14__649
%"key_first_dims__649"<INT64,[2]> ⬅️ ::Slice(%"key_shape__649", %"int64_0_1d_3__649", %"int64_m2_1d_5__649", %"int64_0_1d_3__649", %"int64_1_1d_4__649")
3711 | # n15__649
%"tmp__649"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3712 | # n16__649
%"key_squeezed_shape__649"<INT64,[3]> ⬅️ ::Concat(%"tmp__649", %"key_second_last_dim__649", %"key_last_dim__649") {axis=0}
3713 | # n17__649
%"key_squeezed__649"<FLOAT16,[unk__935,unk__936,unk__937]> ⬅️ ::Reshape(%"view_130__604", %"key_squeezed_shape__649")
3714 | # n18__649
%"key_squeezed_transposed__649"<FLOAT16,[unk__935,unk__937,unk__936]> ⬅️ ::Transpose(%"key_squeezed__649") {perm=[0, 2, 1]}
3715 | # n19__649
%"key_transposed_shape__649"<INT64,[4]> ⬅️ ::Concat(%"key_first_dims__649", %"key_last_dim__649", %"key_second_last_dim__649") {axis=0}
3716 | # n20__649
%"key_transposed__649"<FLOAT16,[unk__938,unk__939,unk__940,unk__941]> ⬅️ ::Reshape(%"key_squeezed_transposed__649", %"key_transposed_shape__649")
3717 | # n21__649
%"tmp_6__649"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__604")
3718 | # n22__649
%"query_scaled__649"<FLOAT16,?> ⬅️ ::Mul(%"add_37__604", %"tmp_6__649")
3719 | # n23__649
%"tmp_7__649"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__604")
3720 | # n24__649
%"key_transposed_scaled__649"<FLOAT16,[unk__938,unk__939,unk__940,unk__941]> ⬅️ ::Mul(%"key_transposed__649", %"tmp_7__649")
3721 | # n25__649
%"tmp_8__649"<FLOAT16,?> ⬅️ ::MatMul(%"query_scaled__649", %"key_transposed_scaled__649")
3722 | # n26__649
%"tmp_9__649"<FLOAT16,?> ⬅️ ::Add(%"tmp_8__649", %"_val_271__604")
3723 | # n27__649
%"attn_weight__649"<FLOAT16,?> ⬅️ ::Softmax(%"tmp_9__649") {axis=-1}
3724 | # n28__649
%"dropout_p__649"<FLOAT,?> ⬅️ ::Constant() {value_float=0.0}
3725 | # n29__649
%"attn_weight_10__649"<FLOAT16,?>, %"___649"<BOOL,?> ⬅️ ::Dropout(%"attn_weight__649", %"dropout_p__649")
3726 | # n30__649
%"_scaled_dot_product_efficient_attention_6__604"<FLOAT16,?> ⬅️ ::MatMul(%"attn_weight_10__649", %"view_131__604")
3727 | # n0__650
%"query_0__650"<FLOAT16,?> ⬅️ ::Transpose(%"add_37__604") {perm=[0, 2, 1, 3]}
3728 | # n1__650
%"query_shape__650"<INT64,[unk__942]> ⬅️ ::Shape(%"query_0__650")
3729 | # n2__650
%"int64_0_1d__650"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
3730 | # n3__650
%"int64_1_1d__650"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
3731 | # n4__650
%"query_first_dims__650"<INT64,[unk__943]> ⬅️ ::Slice(%"query_shape__650", %"int64_0_1d__650", %"int64_1_1d__650", %"int64_0_1d__650", %"int64_1_1d__650")
3732 | # n5__650
%"int64_0_1d_1__650"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
3733 | # n6__650
%"int64_1_1d_2__650"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
3734 | # n7__650
%"int64_2_1d__650"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_2_1d')}
3735 | # n8__650
%"query_second_dims__650"<INT64,[unk__944]> ⬅️ ::Slice(%"query_shape__650", %"int64_1_1d_2__650", %"int64_2_1d__650", %"int64_0_1d_1__650", %"int64_1_1d_2__650")
3736 | # n9__650
%"int64_0_1d_3__650"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
3737 | # n10__650
%"int64_1_1d_4__650"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
3738 | # n11__650
%"int64_m2_1d__650"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
3739 | # n12__650
%"int64_m1_1d__650"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
3740 | # n13__650
%"num_heads__650"<INT64,[unk__945]> ⬅️ ::Slice(%"query_shape__650", %"int64_m2_1d__650", %"int64_m1_1d__650", %"int64_0_1d_3__650", %"int64_1_1d_4__650")
3741 | # n14__650
%"compute_log_sumexp__650"<INT64,?> ⬅️ ::Constant() {value_int=0}
3742 | # n15__650
%"compute_log_sumexp_as_bool__650"<BOOL,?> ⬅️ ::Cast(%"compute_log_sumexp__650") {to=9}
3743 | # n16__650
%"_scaled_dot_product_efficient_attention_6_1__604"<FLOAT,?> ⬅️ ::If(%"compute_log_sumexp_as_bool__650") {then_branch=
graph(
name=thenGraph_14,
inputs=(
),
outputs=(
%"logsum_exp__650"<FLOAT,?>
),
) {
0 | # n0__650_270
%"tmp__650"<FLOAT,[unk__944]> ⬅️ ::Cast(%"query_second_dims__650") {to=1}
1 | # n1__650_271
%"const__650"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
2 | # n2__650_272
%"const_cast__650"<FLOAT,?> ⬅️ ::CastLike(%"const__650", %"tmp__650")
3 | # n3__650_273
%"tmp_5__650"<FLOAT,[unk__944]> ⬅️ ::Div(%"tmp__650", %"const_cast__650")
4 | # n4__650_274
%"tmp_6__650"<FLOAT,[unk__944]> ⬅️ ::Ceil(%"tmp_5__650")
5 | # n5__650_275
%"const_7__650"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_7')}
6 | # n6__650_276
%"const_7_cast__650"<FLOAT,?> ⬅️ ::CastLike(%"const_7__650", %"tmp_6__650")
7 | # n7__650_277
%"tmp_8__650"<FLOAT,[unk__944]> ⬅️ ::Mul(%"tmp_6__650", %"const_7_cast__650")
8 | # n8__650_278
%"logsumexp_dim__650"<INT64,[unk__944]> ⬅️ ::Cast(%"tmp_8__650") {to=7}
9 | # n9__650_279
%"const_9__650"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
10 | # n10__650_280
%"tmp_10__650"<INT64,[unk__946]> ⬅️ ::Concat(%"query_first_dims__650", %"num_heads__650", %"logsumexp_dim__650") {axis=0}
11 | # n11__650_281
%"logsum_exp__650"<FLOAT,?> ⬅️ ::Expand(%"const_9__650", %"tmp_10__650")
return %"logsum_exp__650"<FLOAT,?>
}, else_branch=
graph(
name=elseGraph_14,
inputs=(
),
outputs=(
%"logsum_exp_14__650"<FLOAT,?>
),
) {
0 | # n0__650_282
%"const_11__650"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_11')}
1 | # n1__650_283
%"int64_0_1d_12__650"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_12')}
2 | # n2__650_284
%"int64_0_1d_12_cast__650"<INT64,[1]> ⬅️ ::CastLike(%"int64_0_1d_12__650", %"num_heads__650")
3 | # n3__650_285
%"tmp_13__650"<INT64,[unk__947]> ⬅️ ::Concat(%"query_first_dims__650", %"num_heads__650", %"int64_0_1d_12_cast__650") {axis=0}
4 | # n4__650_286
%"logsum_exp_14__650"<FLOAT,?> ⬅️ ::Expand(%"const_11__650", %"tmp_13__650")
return %"logsum_exp_14__650"<FLOAT,?>
}}
3744 | # n17__650
%"tmp_16__650"<INT64,[0]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[0]>('Empty_INTS')}
3745 | # n18__650
%"tmp_17__650"<FLOAT,?> ⬅️ ::ConstantOfShape(%"tmp_16__650")
3746 | # n19__650
%"_scaled_dot_product_efficient_attention_6_3__604"<INT64,?> ⬅️ ::Cast(%"tmp_17__650") {to=7}
3747 | # Transpose_324__604
%"transpose_27__604"<FLOAT16,?> ⬅️ ::Transpose(%"_scaled_dot_product_efficient_attention_6__604") {perm=[0, 2, 1, 3]}
3748 | # Constant_325__604
%"_val_276__604"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3749 | # n0__651
%"size_0__651"<INT64,[3]> ⬅️ ::Cast(%"_val_276__604") {to=7}
3750 | # n1__651
%"view_132__604"<FLOAT16,[unk__948,unk__949,unk__950]> ⬅️ ::Reshape(%"transpose_27__604", %"size_0__651")
3751 | # n0__654
%"tmp__654"<INT64,[2]> ⬅️ ::Shape(%"model.layers.6.self_attn.o_proj.weight")
3752 | # n1__654
%"rank__653"<INT64,?> ⬅️ ::Size(%"tmp__654")
3753 | # n1__653
%"int64_2__653"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
3754 | # n2__653
%"int64_2_cast__653"<INT64,?> ⬅️ ::CastLike(%"int64_2__653", %"rank__653")
3755 | # n3__653
%"cond__653"<BOOL,?> ⬅️ ::Equal(%"rank__653", %"int64_2_cast__653")
3756 | # n4__653
%"t_45__652"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__653") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__653"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__653_287
%"result__653"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.6.self_attn.o_proj.weight") {perm=[1, 0]}
return %"result__653"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__653"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__653_288
%"result_0__653"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.6.self_attn.o_proj.weight")
return %"result_0__653"<FLOAT16,[4096,4096]>
}}
3757 | # Constant_3__652
%"_val_3__652"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
3758 | # n0__655
%"size_0__655"<INT64,[2]> ⬅️ ::Cast(%"_val_3__652") {to=7}
3759 | # n1__655
%"view_133__652"<FLOAT16,[unk__951,unk__952]> ⬅️ ::Reshape(%"view_132__604", %"size_0__655")
3760 | # n0__656
%"mm_45__652"<FLOAT16,[unk__951,4096]> ⬅️ ::MatMul(%"view_133__652", %"t_45__652")
3761 | # Constant_6__652
%"_val_6__652"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3762 | # n0__657
%"size_0__657"<INT64,[3]> ⬅️ ::Cast(%"_val_6__652") {to=7}
3763 | # n1__657
%"model_layers_6_self_attn_1_2__594"<FLOAT16,[unk__953,unk__954,unk__955]> ⬅️ ::Reshape(%"mm_45__652", %"size_0__657")
3764 | # n0__658
%"alpha__658"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
3765 | # n1__658
%"alpha_0__658"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__658", %"model_layers_6_self_attn_1_2__594")
3766 | # n2__658
%"other_1__658"<FLOAT16,[unk__953,unk__954,unk__955]> ⬅️ ::Mul(%"model_layers_6_self_attn_1_2__594", %"alpha_0__658")
3767 | # n3__658
%"add_39__594"<FLOAT16,[unk__956,128,4096]> ⬅️ ::Add(%"model_layers_5_1_2__1", %"other_1__658")
3768 | # Cast_3__659
%"_to_copy_38__659"<FLOAT,[unk__956,128,4096]> ⬅️ ::Cast(%"add_39__594") {to=1}
3769 | # Constant_4__659
%"_val_2__659"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3770 | # Cast_5__659
%"scalar_tensor_default_13__659"<FLOAT,?> ⬅️ ::Cast(%"_val_2__659") {to=1}
3771 | # n0__660
%"pow_14__659"<FLOAT,[unk__956,128,4096]> ⬅️ ::Pow(%"_to_copy_38__659", %"scalar_tensor_default_13__659")
3772 | # Constant_7__659
%"_val_5__659"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
3773 | # n0__662
%"tmp__662"<INT64,[3]> ⬅️ ::Shape(%"pow_14__659")
3774 | # n1__662
%"tmp_0__662"<INT64,?> ⬅️ ::Size(%"tmp__662")
3775 | # n2__662
%"tmp_1__662"<INT64,?> ⬅️ ::Constant() {value_int=0}
3776 | # n3__662
%"cond__661"<BOOL,?> ⬅️ ::Equal(%"tmp_0__662", %"tmp_1__662")
3777 | # n1__661
%"mean_13__659"<FLOAT,?> ⬅️ ::If(%"cond__661") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__661"<FLOAT,[unk__956,128,4096]>
),
) {
0 | # n0__661_289
%"result__661"<FLOAT,[unk__956,128,4096]> ⬅️ ::Identity(%"pow_14__659")
return %"result__661"<FLOAT,[unk__956,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__661"<FLOAT,?>
),
) {
0 | # n0__663
%"tmp__663"<INT64,[1]> ⬅️ ::Shape(%"_val_5__659")
1 | # n1__663
%"tmp_0__663"<INT64,?> ⬅️ ::Size(%"tmp__663")
2 | # n2__663
%"tmp_1__663"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__663
%"cond_0__661"<BOOL,?> ⬅️ ::Equal(%"tmp_0__663", %"tmp_1__663")
4 | # n1__661_291
%"dim_3__661"<INT64,?> ⬅️ ::If(%"cond_0__661") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__661"<INT64,[1,1]>
),
) {
0 | # n0__661_292
%"int64_0__661"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__661_293
%"dim_1__661"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__659", %"int64_0__661")
return %"dim_1__661"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__661"<INT64,[1]>
),
) {
0 | # n0__661_294
%"dim_2__661"<INT64,[1]> ⬅️ ::Identity(%"_val_5__659")
return %"dim_2__661"<INT64,[1]>
}}
5 | # n2__661
%"result_4__661"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_14__659", %"dim_3__661") {keepdims=1}
return %"result_4__661"<FLOAT,?>
}}
3778 | # Constant_9__659
%"_val_7__659"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
3779 | # n0__664
%"alpha__664"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
3780 | # n1__664
%"alpha_0__664"<FLOAT,?> ⬅️ ::CastLike(%"alpha__664", %"_val_7__659")
3781 | # n2__664
%"other_1__664"<FLOAT,?> ⬅️ ::Mul(%"_val_7__659", %"alpha_0__664")
3782 | # n3__664
%"add_40__659"<FLOAT,?> ⬅️ ::Add(%"mean_13__659", %"other_1__664")
3783 | # n0__665
%"tmp__665"<FLOAT,?> ⬅️ ::Sqrt(%"add_40__659")
3784 | # n1__665
%"rsqrt_13__659"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__665")
3785 | # n0__666
%"mul_66__659"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_38__659", %"rsqrt_13__659")
3786 | # Cast_13__659
%"_to_copy_39__659"<FLOAT16,?> ⬅️ ::Cast(%"mul_66__659") {to=10}
3787 | # n0__667
%"model_layers_6_post_attention_layernorm_1__594"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.6.post_attention_layernorm.weight", %"_to_copy_39__659")
3788 | # n0__671
%"tmp__671"<INT64,[2]> ⬅️ ::Shape(%"model.layers.6.mlp.gate_proj.weight")
3789 | # n1__671
%"rank__670"<INT64,?> ⬅️ ::Size(%"tmp__671")
3790 | # n1__670
%"int64_2__670"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
3791 | # n2__670
%"int64_2_cast__670"<INT64,?> ⬅️ ::CastLike(%"int64_2__670", %"rank__670")
3792 | # n3__670
%"cond__670"<BOOL,?> ⬅️ ::Equal(%"rank__670", %"int64_2_cast__670")
3793 | # n4__670
%"t_46__669"<FLOAT16,[unk__957,unk__958]> ⬅️ ::If(%"cond__670") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__670"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__670_295
%"result__670"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.6.mlp.gate_proj.weight") {perm=[1, 0]}
return %"result__670"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__670"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__670_296
%"result_0__670"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.6.mlp.gate_proj.weight")
return %"result_0__670"<FLOAT16,[14336,4096]>
}}
3794 | # Constant_3__669
%"_val_3__669"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
3795 | # n0__672
%"size_0__672"<INT64,[2]> ⬅️ ::Cast(%"_val_3__669") {to=7}
3796 | # n1__672
%"view_135__669"<FLOAT16,[unk__959,unk__960]> ⬅️ ::Reshape(%"model_layers_6_post_attention_layernorm_1__594", %"size_0__672")
3797 | # n0__673
%"mm_46__669"<FLOAT16,[unk__959,unk__958]> ⬅️ ::MatMul(%"view_135__669", %"t_46__669")
3798 | # Constant_6__669
%"_val_6__669"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3799 | # n0__674
%"size_0__674"<INT64,[3]> ⬅️ ::Cast(%"_val_6__669") {to=7}
3800 | # n1__674
%"model_layers_6_mlp_gate_proj_1__668"<FLOAT16,[unk__961,unk__962,unk__963]> ⬅️ ::Reshape(%"mm_46__669", %"size_0__674")
3801 | # Cast_0__675
%"_to_copy_40__675"<FLOAT,[unk__961,unk__962,unk__963]> ⬅️ ::Cast(%"model_layers_6_mlp_gate_proj_1__668") {to=1}
3802 | # n0__676
%"sigmoid_6__675"<FLOAT,[unk__961,unk__962,unk__963]> ⬅️ ::Sigmoid(%"_to_copy_40__675")
3803 | # n0__677
%"mul_68__675"<FLOAT,[unk__961,unk__962,unk__963]> ⬅️ ::Mul(%"_to_copy_40__675", %"sigmoid_6__675")
3804 | # Cast_3__675
%"model_layers_6_mlp_act_fn_1__668"<FLOAT16,[unk__961,unk__962,unk__963]> ⬅️ ::Cast(%"mul_68__675") {to=10}
3805 | # n0__680
%"tmp__680"<INT64,[2]> ⬅️ ::Shape(%"model.layers.6.mlp.up_proj.weight")
3806 | # n1__680
%"rank__679"<INT64,?> ⬅️ ::Size(%"tmp__680")
3807 | # n1__679
%"int64_2__679"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
3808 | # n2__679
%"int64_2_cast__679"<INT64,?> ⬅️ ::CastLike(%"int64_2__679", %"rank__679")
3809 | # n3__679
%"cond__679"<BOOL,?> ⬅️ ::Equal(%"rank__679", %"int64_2_cast__679")
3810 | # n4__679
%"t_47__678"<FLOAT16,[unk__964,unk__965]> ⬅️ ::If(%"cond__679") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__679"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__679_297
%"result__679"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.6.mlp.up_proj.weight") {perm=[1, 0]}
return %"result__679"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__679"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__679_298
%"result_0__679"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.6.mlp.up_proj.weight")
return %"result_0__679"<FLOAT16,[14336,4096]>
}}
3811 | # Constant_3__678
%"_val_3__678"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
3812 | # n0__681
%"size_0__681"<INT64,[2]> ⬅️ ::Cast(%"_val_3__678") {to=7}
3813 | # n1__681
%"view_137__678"<FLOAT16,[unk__966,unk__967]> ⬅️ ::Reshape(%"model_layers_6_post_attention_layernorm_1__594", %"size_0__681")
3814 | # n0__682
%"mm_47__678"<FLOAT16,[unk__966,unk__965]> ⬅️ ::MatMul(%"view_137__678", %"t_47__678")
3815 | # Constant_6__678
%"_val_6__678"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3816 | # n0__683
%"size_0__683"<INT64,[3]> ⬅️ ::Cast(%"_val_6__678") {to=7}
3817 | # n1__683
%"model_layers_6_mlp_up_proj_1__668"<FLOAT16,[unk__968,unk__969,unk__970]> ⬅️ ::Reshape(%"mm_47__678", %"size_0__683")
3818 | # n0__684
%"mul_69__668"<FLOAT16,[unk__971,unk__972,unk__973]> ⬅️ ::Mul(%"model_layers_6_mlp_act_fn_1__668", %"model_layers_6_mlp_up_proj_1__668")
3819 | # n0__687
%"tmp__687"<INT64,[2]> ⬅️ ::Shape(%"model.layers.6.mlp.down_proj.weight")
3820 | # n1__687
%"rank__686"<INT64,?> ⬅️ ::Size(%"tmp__687")
3821 | # n1__686
%"int64_2__686"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
3822 | # n2__686
%"int64_2_cast__686"<INT64,?> ⬅️ ::CastLike(%"int64_2__686", %"rank__686")
3823 | # n3__686
%"cond__686"<BOOL,?> ⬅️ ::Equal(%"rank__686", %"int64_2_cast__686")
3824 | # n4__686
%"t_48__685"<FLOAT16,[unk__974,unk__975]> ⬅️ ::If(%"cond__686") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__686"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__686_299
%"result__686"<FLOAT16,[14336,4096]> ⬅️ ::Transpose(%"model.layers.6.mlp.down_proj.weight") {perm=[1, 0]}
return %"result__686"<FLOAT16,[14336,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__686"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__686_300
%"result_0__686"<FLOAT16,[4096,14336]> ⬅️ ::Identity(%"model.layers.6.mlp.down_proj.weight")
return %"result_0__686"<FLOAT16,[4096,14336]>
}}
3825 | # Constant_3__685
%"_val_3__685"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
3826 | # n0__688
%"size_0__688"<INT64,[2]> ⬅️ ::Cast(%"_val_3__685") {to=7}
3827 | # n1__688
%"view_139__685"<FLOAT16,[unk__976,unk__977]> ⬅️ ::Reshape(%"mul_69__668", %"size_0__688")
3828 | # n0__689
%"mm_48__685"<FLOAT16,[unk__976,unk__975]> ⬅️ ::MatMul(%"view_139__685", %"t_48__685")
3829 | # Constant_6__685
%"_val_6__685"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3830 | # n0__690
%"size_0__690"<INT64,[3]> ⬅️ ::Cast(%"_val_6__685") {to=7}
3831 | # n1__690
%"model_layers_6_mlp_1__594"<FLOAT16,[unk__978,unk__979,unk__980]> ⬅️ ::Reshape(%"mm_48__685", %"size_0__690")
3832 | # n0__691
%"alpha__691"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
3833 | # n1__691
%"alpha_0__691"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__691", %"model_layers_6_mlp_1__594")
3834 | # n2__691
%"other_1__691"<FLOAT16,[unk__978,unk__979,unk__980]> ⬅️ ::Mul(%"model_layers_6_mlp_1__594", %"alpha_0__691")
3835 | # n3__691
%"model_layers_6_1_2__1"<FLOAT16,[unk__981,128,4096]> ⬅️ ::Add(%"add_39__594", %"other_1__691")
3836 | # Cast_3__693
%"_to_copy_42__693"<FLOAT,[unk__981,128,4096]> ⬅️ ::Cast(%"model_layers_6_1_2__1") {to=1}
3837 | # Constant_4__693
%"_val_2__693"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3838 | # Cast_5__693
%"scalar_tensor_default_14__693"<FLOAT,?> ⬅️ ::Cast(%"_val_2__693") {to=1}
3839 | # n0__694
%"pow_15__693"<FLOAT,[unk__981,128,4096]> ⬅️ ::Pow(%"_to_copy_42__693", %"scalar_tensor_default_14__693")
3840 | # Constant_7__693
%"_val_5__693"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
3841 | # n0__696
%"tmp__696"<INT64,[3]> ⬅️ ::Shape(%"pow_15__693")
3842 | # n1__696
%"tmp_0__696"<INT64,?> ⬅️ ::Size(%"tmp__696")
3843 | # n2__696
%"tmp_1__696"<INT64,?> ⬅️ ::Constant() {value_int=0}
3844 | # n3__696
%"cond__695"<BOOL,?> ⬅️ ::Equal(%"tmp_0__696", %"tmp_1__696")
3845 | # n1__695
%"mean_14__693"<FLOAT,?> ⬅️ ::If(%"cond__695") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__695"<FLOAT,[unk__981,128,4096]>
),
) {
0 | # n0__695_301
%"result__695"<FLOAT,[unk__981,128,4096]> ⬅️ ::Identity(%"pow_15__693")
return %"result__695"<FLOAT,[unk__981,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__695"<FLOAT,?>
),
) {
0 | # n0__697
%"tmp__697"<INT64,[1]> ⬅️ ::Shape(%"_val_5__693")
1 | # n1__697
%"tmp_0__697"<INT64,?> ⬅️ ::Size(%"tmp__697")
2 | # n2__697
%"tmp_1__697"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__697
%"cond_0__695"<BOOL,?> ⬅️ ::Equal(%"tmp_0__697", %"tmp_1__697")
4 | # n1__695_303
%"dim_3__695"<INT64,?> ⬅️ ::If(%"cond_0__695") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__695"<INT64,[1,1]>
),
) {
0 | # n0__695_304
%"int64_0__695"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__695_305
%"dim_1__695"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__693", %"int64_0__695")
return %"dim_1__695"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__695"<INT64,[1]>
),
) {
0 | # n0__695_306
%"dim_2__695"<INT64,[1]> ⬅️ ::Identity(%"_val_5__693")
return %"dim_2__695"<INT64,[1]>
}}
5 | # n2__695
%"result_4__695"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_15__693", %"dim_3__695") {keepdims=1}
return %"result_4__695"<FLOAT,?>
}}
3846 | # Constant_9__693
%"_val_7__693"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
3847 | # n0__698
%"alpha__698"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
3848 | # n1__698
%"alpha_0__698"<FLOAT,?> ⬅️ ::CastLike(%"alpha__698", %"_val_7__693")
3849 | # n2__698
%"other_1__698"<FLOAT,?> ⬅️ ::Mul(%"_val_7__693", %"alpha_0__698")
3850 | # n3__698
%"add_42__693"<FLOAT,?> ⬅️ ::Add(%"mean_14__693", %"other_1__698")
3851 | # n0__699
%"tmp__699"<FLOAT,?> ⬅️ ::Sqrt(%"add_42__693")
3852 | # n1__699
%"rsqrt_14__693"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__699")
3853 | # n0__700
%"mul_70__693"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_42__693", %"rsqrt_14__693")
3854 | # Cast_13__693
%"_to_copy_43__693"<FLOAT16,?> ⬅️ ::Cast(%"mul_70__693") {to=10}
3855 | # n0__701
%"model_layers_7_input_layernorm_1__692"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.7.input_layernorm.weight", %"_to_copy_43__693")
3856 | # n0__705
%"tmp__705"<INT64,[2]> ⬅️ ::Shape(%"model.layers.7.self_attn.q_proj.weight")
3857 | # n1__705
%"rank__704"<INT64,?> ⬅️ ::Size(%"tmp__705")
3858 | # n1__704
%"int64_2__704"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
3859 | # n2__704
%"int64_2_cast__704"<INT64,?> ⬅️ ::CastLike(%"int64_2__704", %"rank__704")
3860 | # n3__704
%"cond__704"<BOOL,?> ⬅️ ::Equal(%"rank__704", %"int64_2_cast__704")
3861 | # n4__704
%"t_49__703"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__704") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__704"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__704_307
%"result__704"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.7.self_attn.q_proj.weight") {perm=[1, 0]}
return %"result__704"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__704"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__704_308
%"result_0__704"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.7.self_attn.q_proj.weight")
return %"result_0__704"<FLOAT16,[4096,4096]>
}}
3862 | # Constant_3__703
%"_val_3__703"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
3863 | # n0__706
%"size_0__706"<INT64,[2]> ⬅️ ::Cast(%"_val_3__703") {to=7}
3864 | # n1__706
%"view_141__703"<FLOAT16,[unk__982,unk__983]> ⬅️ ::Reshape(%"model_layers_7_input_layernorm_1__692", %"size_0__706")
3865 | # n0__707
%"mm_49__703"<FLOAT16,[unk__982,4096]> ⬅️ ::MatMul(%"view_141__703", %"t_49__703")
3866 | # Constant_6__703
%"_val_6__703"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3867 | # n0__708
%"size_0__708"<INT64,[3]> ⬅️ ::Cast(%"_val_6__703") {to=7}
3868 | # n1__708
%"model_layers_7_self_attn_q_proj_1__702"<FLOAT16,[unk__984,unk__985,unk__986]> ⬅️ ::Reshape(%"mm_49__703", %"size_0__708")
3869 | # n0__711
%"tmp__711"<INT64,[2]> ⬅️ ::Shape(%"model.layers.7.self_attn.k_proj.weight")
3870 | # n1__711
%"rank__710"<INT64,?> ⬅️ ::Size(%"tmp__711")
3871 | # n1__710
%"int64_2__710"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
3872 | # n2__710
%"int64_2_cast__710"<INT64,?> ⬅️ ::CastLike(%"int64_2__710", %"rank__710")
3873 | # n3__710
%"cond__710"<BOOL,?> ⬅️ ::Equal(%"rank__710", %"int64_2_cast__710")
3874 | # n4__710
%"t_50__709"<FLOAT16,[unk__987,unk__988]> ⬅️ ::If(%"cond__710") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__710"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__710_309
%"result__710"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.7.self_attn.k_proj.weight") {perm=[1, 0]}
return %"result__710"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__710"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__710_310
%"result_0__710"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.7.self_attn.k_proj.weight")
return %"result_0__710"<FLOAT16,[1024,4096]>
}}
3875 | # Constant_3__709
%"_val_3__709"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
3876 | # n0__712
%"size_0__712"<INT64,[2]> ⬅️ ::Cast(%"_val_3__709") {to=7}
3877 | # n1__712
%"view_143__709"<FLOAT16,[unk__989,unk__990]> ⬅️ ::Reshape(%"model_layers_7_input_layernorm_1__692", %"size_0__712")
3878 | # n0__713
%"mm_50__709"<FLOAT16,[unk__989,unk__988]> ⬅️ ::MatMul(%"view_143__709", %"t_50__709")
3879 | # Constant_6__709
%"_val_6__709"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3880 | # n0__714
%"size_0__714"<INT64,[3]> ⬅️ ::Cast(%"_val_6__709") {to=7}
3881 | # n1__714
%"model_layers_7_self_attn_k_proj_1__702"<FLOAT16,[unk__991,unk__992,unk__993]> ⬅️ ::Reshape(%"mm_50__709", %"size_0__714")
3882 | # n0__717
%"tmp__717"<INT64,[2]> ⬅️ ::Shape(%"model.layers.7.self_attn.v_proj.weight")
3883 | # n1__717
%"rank__716"<INT64,?> ⬅️ ::Size(%"tmp__717")
3884 | # n1__716
%"int64_2__716"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
3885 | # n2__716
%"int64_2_cast__716"<INT64,?> ⬅️ ::CastLike(%"int64_2__716", %"rank__716")
3886 | # n3__716
%"cond__716"<BOOL,?> ⬅️ ::Equal(%"rank__716", %"int64_2_cast__716")
3887 | # n4__716
%"t_51__715"<FLOAT16,[unk__994,unk__995]> ⬅️ ::If(%"cond__716") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__716"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__716_311
%"result__716"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.7.self_attn.v_proj.weight") {perm=[1, 0]}
return %"result__716"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__716"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__716_312
%"result_0__716"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.7.self_attn.v_proj.weight")
return %"result_0__716"<FLOAT16,[1024,4096]>
}}
3888 | # Constant_3__715
%"_val_3__715"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
3889 | # n0__718
%"size_0__718"<INT64,[2]> ⬅️ ::Cast(%"_val_3__715") {to=7}
3890 | # n1__718
%"view_145__715"<FLOAT16,[unk__996,unk__997]> ⬅️ ::Reshape(%"model_layers_7_input_layernorm_1__692", %"size_0__718")
3891 | # n0__719
%"mm_51__715"<FLOAT16,[unk__996,unk__995]> ⬅️ ::MatMul(%"view_145__715", %"t_51__715")
3892 | # Constant_6__715
%"_val_6__715"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
3893 | # n0__720
%"size_0__720"<INT64,[3]> ⬅️ ::Cast(%"_val_6__715") {to=7}
3894 | # n1__720
%"model_layers_7_self_attn_v_proj_1__702"<FLOAT16,[unk__998,unk__999,unk__1000]> ⬅️ ::Reshape(%"mm_51__715", %"size_0__720")
3895 | # Constant_61__702
%"_val_8__702"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
3896 | # n0__721
%"size_0__721"<INT64,[4]> ⬅️ ::Cast(%"_val_8__702") {to=7}
3897 | # n1__721
%"view_147__702"<FLOAT16,[unk__1001,unk__1002,unk__1003,unk__1004]> ⬅️ ::Reshape(%"model_layers_7_self_attn_q_proj_1__702", %"size_0__721")
3898 | # Transpose_63__702
%"transpose_28__702"<FLOAT16,[unk__1001,unk__1003,unk__1002,unk__1004]> ⬅️ ::Transpose(%"view_147__702") {perm=[0, 2, 1, 3]}
3899 | # Constant_64__702
%"_val_11__702"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
3900 | # n0__722
%"size_0__722"<INT64,[4]> ⬅️ ::Cast(%"_val_11__702") {to=7}
3901 | # n1__722
%"view_148__702"<FLOAT16,[unk__1005,unk__1006,unk__1007,unk__1008]> ⬅️ ::Reshape(%"model_layers_7_self_attn_k_proj_1__702", %"size_0__722")
3902 | # Transpose_66__702
%"transpose_29__702"<FLOAT16,[unk__1005,unk__1007,unk__1006,unk__1008]> ⬅️ ::Transpose(%"view_148__702") {perm=[0, 2, 1, 3]}
3903 | # Constant_67__702
%"_val_14__702"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
3904 | # n0__723
%"size_0__723"<INT64,[4]> ⬅️ ::Cast(%"_val_14__702") {to=7}
3905 | # n1__723
%"view_149__702"<FLOAT16,[unk__1009,unk__1010,unk__1011,unk__1012]> ⬅️ ::Reshape(%"model_layers_7_self_attn_v_proj_1__702", %"size_0__723")
3906 | # Transpose_69__702
%"model_1_14"<FLOAT16,[1,8,128,128]> ⬅️ ::Transpose(%"view_149__702") {perm=[0, 2, 1, 3]}
3907 | # Constant_8__724
%"_val_1__724"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3908 | # Cast_9__724
%"_val_2__724"<INT64,?> ⬅️ ::Cast(%"_val_1__724") {to=7}
3909 | # Constant_10__724
%"_val_3__724"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3910 | # Reshape_11__724
%"_val_4__724"<INT64,[1]> ⬅️ ::Reshape(%"_val_2__724", %"_val_3__724") {allowzero=0}
3911 | # Constant_12__724
%"_val_5__724"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3912 | # Cast_13__724
%"_val_6__724"<INT64,?> ⬅️ ::Cast(%"_val_5__724") {to=7}
3913 | # Constant_14__724
%"_val_7__724"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3914 | # Reshape_15__724
%"_val_8__724"<INT64,[1]> ⬅️ ::Reshape(%"_val_6__724", %"_val_7__724") {allowzero=0}
3915 | # Constant_16__724
%"_val_9__724"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3916 | # Cast_17__724
%"_val_10__724"<INT64,?> ⬅️ ::Cast(%"_val_9__724") {to=7}
3917 | # Constant_18__724
%"_val_11__724"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3918 | # Reshape_19__724
%"_val_12__724"<INT64,[1]> ⬅️ ::Reshape(%"_val_10__724", %"_val_11__724") {allowzero=0}
3919 | # Constant_20__724
%"_val_13__724"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3920 | # Cast_21__724
%"_val_14__724"<INT64,?> ⬅️ ::Cast(%"_val_13__724") {to=7}
3921 | # Constant_22__724
%"_val_15__724"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3922 | # Reshape_23__724
%"_val_16__724"<INT64,[1]> ⬅️ ::Reshape(%"_val_14__724", %"_val_15__724") {allowzero=0}
3923 | # Slice_24__724
%"model_layers_7_self_attn_rotary_emb_1__702"<FLOAT16,[unk__1013,unk__1014]> ⬅️ ::Slice(%"model.layers.7.self_attn.rotary_emb.cos_cached", %"_val_4__724", %"_val_8__724", %"_val_12__724", %"_val_16__724")
3924 | # Constant_25__724
%"_val_19__724"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3925 | # Cast_26__724
%"_val_20__724"<INT64,?> ⬅️ ::Cast(%"_val_19__724") {to=7}
3926 | # Constant_27__724
%"_val_21__724"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3927 | # Reshape_28__724
%"_val_22__724"<INT64,[1]> ⬅️ ::Reshape(%"_val_20__724", %"_val_21__724") {allowzero=0}
3928 | # Constant_29__724
%"_val_23__724"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3929 | # Cast_30__724
%"_val_24__724"<INT64,?> ⬅️ ::Cast(%"_val_23__724") {to=7}
3930 | # Constant_31__724
%"_val_25__724"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3931 | # Reshape_32__724
%"_val_26__724"<INT64,[1]> ⬅️ ::Reshape(%"_val_24__724", %"_val_25__724") {allowzero=0}
3932 | # Constant_33__724
%"_val_27__724"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3933 | # Cast_34__724
%"_val_28__724"<INT64,?> ⬅️ ::Cast(%"_val_27__724") {to=7}
3934 | # Constant_35__724
%"_val_29__724"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3935 | # Reshape_36__724
%"_val_30__724"<INT64,[1]> ⬅️ ::Reshape(%"_val_28__724", %"_val_29__724") {allowzero=0}
3936 | # Constant_37__724
%"_val_31__724"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3937 | # Cast_38__724
%"_val_32__724"<INT64,?> ⬅️ ::Cast(%"_val_31__724") {to=7}
3938 | # Constant_39__724
%"_val_33__724"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3939 | # Reshape_40__724
%"_val_34__724"<INT64,[1]> ⬅️ ::Reshape(%"_val_32__724", %"_val_33__724") {allowzero=0}
3940 | # Slice_41__724
%"model_layers_7_self_attn_rotary_emb_1_1__702"<FLOAT16,[unk__1015,unk__1016]> ⬅️ ::Slice(%"model.layers.7.self_attn.rotary_emb.sin_cached", %"_val_22__724", %"_val_26__724", %"_val_30__724", %"_val_34__724")
3941 | # Transpose_71__702
%"_val_21__702"<FLOAT16,[unk__1013,unk__1014]> ⬅️ ::Transpose(%"model_layers_7_self_attn_rotary_emb_1__702") {perm=[0, 1]}
3942 | # Max_72__702
%"_val_22__702"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
3943 | # Shape_73__702
%"_val_23__702"<INT64,[2]> ⬅️ ::Shape(%"_val_22__702") {start=0}
3944 | # Expand_74__702
%"_val_24__702"<INT64,[unk__1017,unk__1018]> ⬅️ ::Expand(%"view__1", %"_val_23__702")
3945 | # Constant_75__702
%"_val_25__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3946 | # Unsqueeze_76__702
%"_val_26__702"<INT64,[unk__1017,unk__1018,1]> ⬅️ ::Unsqueeze(%"_val_24__702", %"_val_25__702")
3947 | # Concat_77__702
%"_val_27__702"<INT64,[unk__1017,unk__1018,1]> ⬅️ ::Concat(%"_val_26__702") {axis=-1}
3948 | # GatherND_78__702
%"_val_28__702"<FLOAT16,[unk__1017,unk__1018,unk__1014]> ⬅️ ::GatherND(%"_val_21__702", %"_val_27__702") {batch_dims=0}
3949 | # Transpose_79__702
%"index_14__702"<FLOAT16,[unk__1017,unk__1018,unk__1014]> ⬅️ ::Transpose(%"_val_28__702") {perm=[0, 1, 2]}
3950 | # n0__725
%"dim__725"<INT64,?> ⬅️ ::Constant() {value_int=1}
3951 | # n1__725
%"dim_0__725"<INT64,?> ⬅️ ::Cast(%"dim__725") {to=7}
3952 | # n2__725
%"unsqueeze_29__702"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_14__702", %"dim_0__725")
3953 | # Transpose_81__702
%"_val_31__702"<FLOAT16,[unk__1015,unk__1016]> ⬅️ ::Transpose(%"model_layers_7_self_attn_rotary_emb_1_1__702") {perm=[0, 1]}
3954 | # Max_82__702
%"_val_32__702"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
3955 | # Shape_83__702
%"_val_33__702"<INT64,[2]> ⬅️ ::Shape(%"_val_32__702") {start=0}
3956 | # Expand_84__702
%"_val_34__702"<INT64,[unk__1019,unk__1020]> ⬅️ ::Expand(%"view__1", %"_val_33__702")
3957 | # Constant_85__702
%"_val_35__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3958 | # Unsqueeze_86__702
%"_val_36__702"<INT64,[unk__1019,unk__1020,1]> ⬅️ ::Unsqueeze(%"_val_34__702", %"_val_35__702")
3959 | # Concat_87__702
%"_val_37__702"<INT64,[unk__1019,unk__1020,1]> ⬅️ ::Concat(%"_val_36__702") {axis=-1}
3960 | # GatherND_88__702
%"_val_38__702"<FLOAT16,[unk__1019,unk__1020,unk__1016]> ⬅️ ::GatherND(%"_val_31__702", %"_val_37__702") {batch_dims=0}
3961 | # Transpose_89__702
%"index_15__702"<FLOAT16,[unk__1019,unk__1020,unk__1016]> ⬅️ ::Transpose(%"_val_38__702") {perm=[0, 1, 2]}
3962 | # n0__726
%"dim__726"<INT64,?> ⬅️ ::Constant() {value_int=1}
3963 | # n1__726
%"dim_0__726"<INT64,?> ⬅️ ::Cast(%"dim__726") {to=7}
3964 | # n2__726
%"unsqueeze_30__702"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_15__702", %"dim_0__726")
3965 | # n0__727
%"mul_72__702"<FLOAT16,?> ⬅️ ::Mul(%"transpose_28__702", %"unsqueeze_29__702")
3966 | # Constant_92__702
%"_val_42__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3967 | # Cast_93__702
%"_val_43__702"<INT64,?> ⬅️ ::Cast(%"_val_42__702") {to=7}
3968 | # Constant_94__702
%"_val_44__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3969 | # Reshape_95__702
%"_val_45__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_43__702", %"_val_44__702") {allowzero=0}
3970 | # Constant_96__702
%"_val_46__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3971 | # Cast_97__702
%"_val_47__702"<INT64,?> ⬅️ ::Cast(%"_val_46__702") {to=7}
3972 | # Constant_98__702
%"_val_48__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3973 | # Reshape_99__702
%"_val_49__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_47__702", %"_val_48__702") {allowzero=0}
3974 | # Constant_100__702
%"_val_50__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3975 | # Cast_101__702
%"_val_51__702"<INT64,?> ⬅️ ::Cast(%"_val_50__702") {to=7}
3976 | # Constant_102__702
%"_val_52__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3977 | # Reshape_103__702
%"_val_53__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_51__702", %"_val_52__702") {allowzero=0}
3978 | # Constant_104__702
%"_val_54__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3979 | # Cast_105__702
%"_val_55__702"<INT64,?> ⬅️ ::Cast(%"_val_54__702") {to=7}
3980 | # Constant_106__702
%"_val_56__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3981 | # Reshape_107__702
%"_val_57__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_55__702", %"_val_56__702") {allowzero=0}
3982 | # Slice_108__702
%"slice_101__702"<FLOAT16,[unk__1021,unk__1022,unk__1023,unk__1024]> ⬅️ ::Slice(%"transpose_28__702", %"_val_45__702", %"_val_49__702", %"_val_53__702", %"_val_57__702")
3983 | # Constant_109__702
%"_val_59__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3984 | # Cast_110__702
%"_val_60__702"<INT64,?> ⬅️ ::Cast(%"_val_59__702") {to=7}
3985 | # Constant_111__702
%"_val_61__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3986 | # Reshape_112__702
%"_val_62__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_60__702", %"_val_61__702") {allowzero=0}
3987 | # Constant_113__702
%"_val_63__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3988 | # Cast_114__702
%"_val_64__702"<INT64,?> ⬅️ ::Cast(%"_val_63__702") {to=7}
3989 | # Constant_115__702
%"_val_65__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3990 | # Reshape_116__702
%"_val_66__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_64__702", %"_val_65__702") {allowzero=0}
3991 | # Constant_117__702
%"_val_67__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3992 | # Cast_118__702
%"_val_68__702"<INT64,?> ⬅️ ::Cast(%"_val_67__702") {to=7}
3993 | # Constant_119__702
%"_val_69__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3994 | # Reshape_120__702
%"_val_70__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_68__702", %"_val_69__702") {allowzero=0}
3995 | # Constant_121__702
%"_val_71__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
3996 | # Cast_122__702
%"_val_72__702"<INT64,?> ⬅️ ::Cast(%"_val_71__702") {to=7}
3997 | # Constant_123__702
%"_val_73__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
3998 | # Reshape_124__702
%"_val_74__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_72__702", %"_val_73__702") {allowzero=0}
3999 | # Slice_125__702
%"slice_102__702"<FLOAT16,[unk__1025,unk__1026,unk__1027,unk__1028]> ⬅️ ::Slice(%"transpose_28__702", %"_val_62__702", %"_val_66__702", %"_val_70__702", %"_val_74__702")
4000 | # n0__728
%"neg_14__702"<FLOAT16,[unk__1025,unk__1026,unk__1027,unk__1028]> ⬅️ ::Neg(%"slice_102__702")
4001 | # SequenceConstruct_127__702
%"77__702"<Sequence(Tensor(FLOAT16)),[unk__1029,unk__1030,unk__1031,unk__1032]> ⬅️ ::SequenceConstruct(%"neg_14__702", %"slice_101__702")
4002 | # n0__729
%"cat_14__702"<FLOAT16,[unk__1029,unk__1030,unk__1031,unk__1033]> ⬅️ ::ConcatFromSequence(%"77__702") {axis=-1}
4003 | # n0__730
%"mul_73__702"<FLOAT16,?> ⬅️ ::Mul(%"cat_14__702", %"unsqueeze_30__702")
4004 | # n0__731
%"alpha__731"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
4005 | # n1__731
%"alpha_0__731"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__731", %"mul_73__702")
4006 | # n2__731
%"other_1__731"<FLOAT16,?> ⬅️ ::Mul(%"mul_73__702", %"alpha_0__731")
4007 | # n3__731
%"add_43__702"<FLOAT16,?> ⬅️ ::Add(%"mul_72__702", %"other_1__731")
4008 | # n0__732
%"mul_74__702"<FLOAT16,?> ⬅️ ::Mul(%"transpose_29__702", %"unsqueeze_29__702")
4009 | # Constant_132__702
%"_val_82__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4010 | # Cast_133__702
%"_val_83__702"<INT64,?> ⬅️ ::Cast(%"_val_82__702") {to=7}
4011 | # Constant_134__702
%"_val_84__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4012 | # Reshape_135__702
%"_val_85__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_83__702", %"_val_84__702") {allowzero=0}
4013 | # Constant_136__702
%"_val_86__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4014 | # Cast_137__702
%"_val_87__702"<INT64,?> ⬅️ ::Cast(%"_val_86__702") {to=7}
4015 | # Constant_138__702
%"_val_88__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4016 | # Reshape_139__702
%"_val_89__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_87__702", %"_val_88__702") {allowzero=0}
4017 | # Constant_140__702
%"_val_90__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4018 | # Cast_141__702
%"_val_91__702"<INT64,?> ⬅️ ::Cast(%"_val_90__702") {to=7}
4019 | # Constant_142__702
%"_val_92__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4020 | # Reshape_143__702
%"_val_93__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_91__702", %"_val_92__702") {allowzero=0}
4021 | # Constant_144__702
%"_val_94__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4022 | # Cast_145__702
%"_val_95__702"<INT64,?> ⬅️ ::Cast(%"_val_94__702") {to=7}
4023 | # Constant_146__702
%"_val_96__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4024 | # Reshape_147__702
%"_val_97__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_95__702", %"_val_96__702") {allowzero=0}
4025 | # Slice_148__702
%"slice_103__702"<FLOAT16,[unk__1034,unk__1035,unk__1036,unk__1037]> ⬅️ ::Slice(%"transpose_29__702", %"_val_85__702", %"_val_89__702", %"_val_93__702", %"_val_97__702")
4026 | # Constant_149__702
%"_val_99__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4027 | # Cast_150__702
%"_val_100__702"<INT64,?> ⬅️ ::Cast(%"_val_99__702") {to=7}
4028 | # Constant_151__702
%"_val_101__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4029 | # Reshape_152__702
%"_val_102__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_100__702", %"_val_101__702") {allowzero=0}
4030 | # Constant_153__702
%"_val_103__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4031 | # Cast_154__702
%"_val_104__702"<INT64,?> ⬅️ ::Cast(%"_val_103__702") {to=7}
4032 | # Constant_155__702
%"_val_105__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4033 | # Reshape_156__702
%"_val_106__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_104__702", %"_val_105__702") {allowzero=0}
4034 | # Constant_157__702
%"_val_107__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4035 | # Cast_158__702
%"_val_108__702"<INT64,?> ⬅️ ::Cast(%"_val_107__702") {to=7}
4036 | # Constant_159__702
%"_val_109__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4037 | # Reshape_160__702
%"_val_110__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_108__702", %"_val_109__702") {allowzero=0}
4038 | # Constant_161__702
%"_val_111__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4039 | # Cast_162__702
%"_val_112__702"<INT64,?> ⬅️ ::Cast(%"_val_111__702") {to=7}
4040 | # Constant_163__702
%"_val_113__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4041 | # Reshape_164__702
%"_val_114__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_112__702", %"_val_113__702") {allowzero=0}
4042 | # Slice_165__702
%"slice_104__702"<FLOAT16,[unk__1038,unk__1039,unk__1040,unk__1041]> ⬅️ ::Slice(%"transpose_29__702", %"_val_102__702", %"_val_106__702", %"_val_110__702", %"_val_114__702")
4043 | # n0__733
%"neg_15__702"<FLOAT16,[unk__1038,unk__1039,unk__1040,unk__1041]> ⬅️ ::Neg(%"slice_104__702")
4044 | # SequenceConstruct_167__702
%"117__702"<Sequence(Tensor(FLOAT16)),[unk__1042,unk__1043,unk__1044,unk__1045]> ⬅️ ::SequenceConstruct(%"neg_15__702", %"slice_103__702")
4045 | # n0__734
%"cat_15__702"<FLOAT16,[unk__1042,unk__1043,unk__1044,unk__1046]> ⬅️ ::ConcatFromSequence(%"117__702") {axis=-1}
4046 | # n0__735
%"mul_75__702"<FLOAT16,?> ⬅️ ::Mul(%"cat_15__702", %"unsqueeze_30__702")
4047 | # n0__736
%"alpha__736"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
4048 | # n1__736
%"alpha_0__736"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__736", %"mul_75__702")
4049 | # n2__736
%"other_1__736"<FLOAT16,?> ⬅️ ::Mul(%"mul_75__702", %"alpha_0__736")
4050 | # n3__736
%"model_1_15"<FLOAT16,[1,8,128,128]> ⬅️ ::Add(%"mul_74__702", %"other_1__736")
4051 | # Constant_171__702
%"_val_121__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4052 | # Cast_172__702
%"_val_122__702"<INT64,?> ⬅️ ::Cast(%"_val_121__702") {to=7}
4053 | # Constant_173__702
%"_val_123__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4054 | # Reshape_174__702
%"_val_124__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_122__702", %"_val_123__702") {allowzero=0}
4055 | # Constant_175__702
%"_val_125__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4056 | # Cast_176__702
%"_val_126__702"<INT64,?> ⬅️ ::Cast(%"_val_125__702") {to=7}
4057 | # Constant_177__702
%"_val_127__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4058 | # Reshape_178__702
%"_val_128__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_126__702", %"_val_127__702") {allowzero=0}
4059 | # Constant_179__702
%"_val_129__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4060 | # Cast_180__702
%"_val_130__702"<INT64,?> ⬅️ ::Cast(%"_val_129__702") {to=7}
4061 | # Constant_181__702
%"_val_131__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4062 | # Reshape_182__702
%"_val_132__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_130__702", %"_val_131__702") {allowzero=0}
4063 | # Constant_183__702
%"_val_133__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4064 | # Cast_184__702
%"_val_134__702"<INT64,?> ⬅️ ::Cast(%"_val_133__702") {to=7}
4065 | # Constant_185__702
%"_val_135__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4066 | # Reshape_186__702
%"_val_136__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_134__702", %"_val_135__702") {allowzero=0}
4067 | # Slice_187__702
%"slice_105__702"<FLOAT16,[unk__1047,unk__1048,unk__1049,unk__1050]> ⬅️ ::Slice(%"model_1_15", %"_val_124__702", %"_val_128__702", %"_val_132__702", %"_val_136__702")
4068 | # Constant_188__702
%"_val_138__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4069 | # Cast_189__702
%"_val_139__702"<INT64,?> ⬅️ ::Cast(%"_val_138__702") {to=7}
4070 | # Constant_190__702
%"_val_140__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4071 | # Reshape_191__702
%"_val_141__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_139__702", %"_val_140__702") {allowzero=0}
4072 | # Constant_192__702
%"_val_142__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4073 | # Cast_193__702
%"_val_143__702"<INT64,?> ⬅️ ::Cast(%"_val_142__702") {to=7}
4074 | # Constant_194__702
%"_val_144__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4075 | # Reshape_195__702
%"_val_145__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_143__702", %"_val_144__702") {allowzero=0}
4076 | # Constant_196__702
%"_val_146__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4077 | # Cast_197__702
%"_val_147__702"<INT64,?> ⬅️ ::Cast(%"_val_146__702") {to=7}
4078 | # Constant_198__702
%"_val_148__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4079 | # Reshape_199__702
%"_val_149__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_147__702", %"_val_148__702") {allowzero=0}
4080 | # Constant_200__702
%"_val_150__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4081 | # Cast_201__702
%"_val_151__702"<INT64,?> ⬅️ ::Cast(%"_val_150__702") {to=7}
4082 | # Constant_202__702
%"_val_152__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4083 | # Reshape_203__702
%"_val_153__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_151__702", %"_val_152__702") {allowzero=0}
4084 | # Slice_204__702
%"slice_106__702"<FLOAT16,[unk__1051,unk__1052,unk__1053,unk__1054]> ⬅️ ::Slice(%"slice_105__702", %"_val_141__702", %"_val_145__702", %"_val_149__702", %"_val_153__702")
4085 | # n0__737
%"dim__737"<INT64,?> ⬅️ ::Constant() {value_int=2}
4086 | # n1__737
%"dim_0__737"<INT64,?> ⬅️ ::Cast(%"dim__737") {to=7}
4087 | # n2__737
%"unsqueeze_31__702"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_106__702", %"dim_0__737")
4088 | # Constant_206__702
%"_val_156__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4089 | # Cast_207__702
%"_val_157__702"<INT64,?> ⬅️ ::Cast(%"_val_156__702") {to=7}
4090 | # Constant_208__702
%"_val_158__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4091 | # Reshape_209__702
%"_val_159__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_157__702", %"_val_158__702") {allowzero=0}
4092 | # Constant_210__702
%"_val_160__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4093 | # Cast_211__702
%"_val_161__702"<INT64,?> ⬅️ ::Cast(%"_val_160__702") {to=7}
4094 | # Constant_212__702
%"_val_162__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4095 | # Reshape_213__702
%"_val_163__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_161__702", %"_val_162__702") {allowzero=0}
4096 | # Constant_214__702
%"_val_164__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4097 | # Cast_215__702
%"_val_165__702"<INT64,?> ⬅️ ::Cast(%"_val_164__702") {to=7}
4098 | # Constant_216__702
%"_val_166__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4099 | # Reshape_217__702
%"_val_167__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_165__702", %"_val_166__702") {allowzero=0}
4100 | # Constant_218__702
%"_val_168__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4101 | # Cast_219__702
%"_val_169__702"<INT64,?> ⬅️ ::Cast(%"_val_168__702") {to=7}
4102 | # Constant_220__702
%"_val_170__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4103 | # Reshape_221__702
%"_val_171__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_169__702", %"_val_170__702") {allowzero=0}
4104 | # Slice_222__702
%"slice_107__702"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_31__702", %"_val_159__702", %"_val_163__702", %"_val_167__702", %"_val_171__702")
4105 | # Constant_223__702
%"_val_173__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4106 | # Cast_224__702
%"_val_174__702"<INT64,?> ⬅️ ::Cast(%"_val_173__702") {to=7}
4107 | # Constant_225__702
%"_val_175__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4108 | # Reshape_226__702
%"_val_176__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_174__702", %"_val_175__702") {allowzero=0}
4109 | # Constant_227__702
%"_val_177__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4110 | # Cast_228__702
%"_val_178__702"<INT64,?> ⬅️ ::Cast(%"_val_177__702") {to=7}
4111 | # Constant_229__702
%"_val_179__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4112 | # Reshape_230__702
%"_val_180__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_178__702", %"_val_179__702") {allowzero=0}
4113 | # Constant_231__702
%"_val_181__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4114 | # Cast_232__702
%"_val_182__702"<INT64,?> ⬅️ ::Cast(%"_val_181__702") {to=7}
4115 | # Constant_233__702
%"_val_183__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4116 | # Reshape_234__702
%"_val_184__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_182__702", %"_val_183__702") {allowzero=0}
4117 | # Constant_235__702
%"_val_185__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4118 | # Cast_236__702
%"_val_186__702"<INT64,?> ⬅️ ::Cast(%"_val_185__702") {to=7}
4119 | # Constant_237__702
%"_val_187__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4120 | # Reshape_238__702
%"_val_188__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_186__702", %"_val_187__702") {allowzero=0}
4121 | # Slice_239__702
%"slice_108__702"<FLOAT16,?> ⬅️ ::Slice(%"slice_107__702", %"_val_176__702", %"_val_180__702", %"_val_184__702", %"_val_188__702")
4122 | # Constant_240__702
%"_val_190__702"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
4123 | # n0__738
%"size_0__738"<INT64,[5]> ⬅️ ::Cast(%"_val_190__702") {to=7}
4124 | # n1__738
%"size_1__738"<INT64,[5]> ⬅️ ::Abs(%"size_0__738")
4125 | # n2__738
%"expand_14__702"<FLOAT16,?> ⬅️ ::Expand(%"slice_108__702", %"size_1__738")
4126 | # n0__739
%"clone_14__702"<FLOAT16,?> ⬅️ ::Identity(%"expand_14__702")
4127 | # Constant_243__702
%"_val_193__702"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
4128 | # n0__740
%"size_0__740"<INT64,[4]> ⬅️ ::Cast(%"_val_193__702") {to=7}
4129 | # n1__740
%"view_150__702"<FLOAT16,[unk__1055,unk__1056,unk__1057,unk__1058]> ⬅️ ::Reshape(%"clone_14__702", %"size_0__740")
4130 | # Constant_245__702
%"_val_195__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4131 | # Cast_246__702
%"_val_196__702"<INT64,?> ⬅️ ::Cast(%"_val_195__702") {to=7}
4132 | # Constant_247__702
%"_val_197__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4133 | # Reshape_248__702
%"_val_198__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_196__702", %"_val_197__702") {allowzero=0}
4134 | # Constant_249__702
%"_val_199__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4135 | # Cast_250__702
%"_val_200__702"<INT64,?> ⬅️ ::Cast(%"_val_199__702") {to=7}
4136 | # Constant_251__702
%"_val_201__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4137 | # Reshape_252__702
%"_val_202__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_200__702", %"_val_201__702") {allowzero=0}
4138 | # Constant_253__702
%"_val_203__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4139 | # Cast_254__702
%"_val_204__702"<INT64,?> ⬅️ ::Cast(%"_val_203__702") {to=7}
4140 | # Constant_255__702
%"_val_205__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4141 | # Reshape_256__702
%"_val_206__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_204__702", %"_val_205__702") {allowzero=0}
4142 | # Constant_257__702
%"_val_207__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4143 | # Cast_258__702
%"_val_208__702"<INT64,?> ⬅️ ::Cast(%"_val_207__702") {to=7}
4144 | # Constant_259__702
%"_val_209__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4145 | # Reshape_260__702
%"_val_210__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_208__702", %"_val_209__702") {allowzero=0}
4146 | # Slice_261__702
%"slice_109__702"<FLOAT16,[unk__1059,unk__1060,unk__1061,unk__1062]> ⬅️ ::Slice(%"model_1_14", %"_val_198__702", %"_val_202__702", %"_val_206__702", %"_val_210__702")
4147 | # Constant_262__702
%"_val_212__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4148 | # Cast_263__702
%"_val_213__702"<INT64,?> ⬅️ ::Cast(%"_val_212__702") {to=7}
4149 | # Constant_264__702
%"_val_214__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4150 | # Reshape_265__702
%"_val_215__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_213__702", %"_val_214__702") {allowzero=0}
4151 | # Constant_266__702
%"_val_216__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4152 | # Cast_267__702
%"_val_217__702"<INT64,?> ⬅️ ::Cast(%"_val_216__702") {to=7}
4153 | # Constant_268__702
%"_val_218__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4154 | # Reshape_269__702
%"_val_219__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_217__702", %"_val_218__702") {allowzero=0}
4155 | # Constant_270__702
%"_val_220__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4156 | # Cast_271__702
%"_val_221__702"<INT64,?> ⬅️ ::Cast(%"_val_220__702") {to=7}
4157 | # Constant_272__702
%"_val_222__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4158 | # Reshape_273__702
%"_val_223__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_221__702", %"_val_222__702") {allowzero=0}
4159 | # Constant_274__702
%"_val_224__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4160 | # Cast_275__702
%"_val_225__702"<INT64,?> ⬅️ ::Cast(%"_val_224__702") {to=7}
4161 | # Constant_276__702
%"_val_226__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4162 | # Reshape_277__702
%"_val_227__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_225__702", %"_val_226__702") {allowzero=0}
4163 | # Slice_278__702
%"slice_110__702"<FLOAT16,[unk__1063,unk__1064,unk__1065,unk__1066]> ⬅️ ::Slice(%"slice_109__702", %"_val_215__702", %"_val_219__702", %"_val_223__702", %"_val_227__702")
4164 | # n0__741
%"dim__741"<INT64,?> ⬅️ ::Constant() {value_int=2}
4165 | # n1__741
%"dim_0__741"<INT64,?> ⬅️ ::Cast(%"dim__741") {to=7}
4166 | # n2__741
%"unsqueeze_32__702"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_110__702", %"dim_0__741")
4167 | # Constant_280__702
%"_val_230__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4168 | # Cast_281__702
%"_val_231__702"<INT64,?> ⬅️ ::Cast(%"_val_230__702") {to=7}
4169 | # Constant_282__702
%"_val_232__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4170 | # Reshape_283__702
%"_val_233__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_231__702", %"_val_232__702") {allowzero=0}
4171 | # Constant_284__702
%"_val_234__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4172 | # Cast_285__702
%"_val_235__702"<INT64,?> ⬅️ ::Cast(%"_val_234__702") {to=7}
4173 | # Constant_286__702
%"_val_236__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4174 | # Reshape_287__702
%"_val_237__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_235__702", %"_val_236__702") {allowzero=0}
4175 | # Constant_288__702
%"_val_238__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4176 | # Cast_289__702
%"_val_239__702"<INT64,?> ⬅️ ::Cast(%"_val_238__702") {to=7}
4177 | # Constant_290__702
%"_val_240__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4178 | # Reshape_291__702
%"_val_241__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_239__702", %"_val_240__702") {allowzero=0}
4179 | # Constant_292__702
%"_val_242__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4180 | # Cast_293__702
%"_val_243__702"<INT64,?> ⬅️ ::Cast(%"_val_242__702") {to=7}
4181 | # Constant_294__702
%"_val_244__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4182 | # Reshape_295__702
%"_val_245__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_243__702", %"_val_244__702") {allowzero=0}
4183 | # Slice_296__702
%"slice_111__702"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_32__702", %"_val_233__702", %"_val_237__702", %"_val_241__702", %"_val_245__702")
4184 | # Constant_297__702
%"_val_247__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4185 | # Cast_298__702
%"_val_248__702"<INT64,?> ⬅️ ::Cast(%"_val_247__702") {to=7}
4186 | # Constant_299__702
%"_val_249__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4187 | # Reshape_300__702
%"_val_250__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_248__702", %"_val_249__702") {allowzero=0}
4188 | # Constant_301__702
%"_val_251__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4189 | # Cast_302__702
%"_val_252__702"<INT64,?> ⬅️ ::Cast(%"_val_251__702") {to=7}
4190 | # Constant_303__702
%"_val_253__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4191 | # Reshape_304__702
%"_val_254__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_252__702", %"_val_253__702") {allowzero=0}
4192 | # Constant_305__702
%"_val_255__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4193 | # Cast_306__702
%"_val_256__702"<INT64,?> ⬅️ ::Cast(%"_val_255__702") {to=7}
4194 | # Constant_307__702
%"_val_257__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4195 | # Reshape_308__702
%"_val_258__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_256__702", %"_val_257__702") {allowzero=0}
4196 | # Constant_309__702
%"_val_259__702"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4197 | # Cast_310__702
%"_val_260__702"<INT64,?> ⬅️ ::Cast(%"_val_259__702") {to=7}
4198 | # Constant_311__702
%"_val_261__702"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4199 | # Reshape_312__702
%"_val_262__702"<INT64,[1]> ⬅️ ::Reshape(%"_val_260__702", %"_val_261__702") {allowzero=0}
4200 | # Slice_313__702
%"slice_112__702"<FLOAT16,?> ⬅️ ::Slice(%"slice_111__702", %"_val_250__702", %"_val_254__702", %"_val_258__702", %"_val_262__702")
4201 | # Constant_314__702
%"_val_264__702"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
4202 | # n0__742
%"size_0__742"<INT64,[5]> ⬅️ ::Cast(%"_val_264__702") {to=7}
4203 | # n1__742
%"size_1__742"<INT64,[5]> ⬅️ ::Abs(%"size_0__742")
4204 | # n2__742
%"expand_15__702"<FLOAT16,?> ⬅️ ::Expand(%"slice_112__702", %"size_1__742")
4205 | # n0__743
%"clone_15__702"<FLOAT16,?> ⬅️ ::Identity(%"expand_15__702")
4206 | # Constant_317__702
%"_val_267__702"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
4207 | # n0__744
%"size_0__744"<INT64,[4]> ⬅️ ::Cast(%"_val_267__702") {to=7}
4208 | # n1__744
%"view_151__702"<FLOAT16,[unk__1067,unk__1068,unk__1069,unk__1070]> ⬅️ ::Reshape(%"clone_15__702", %"size_0__744")
4209 | # n0__745
%"tmp__745"<INT64,[unk__1071]> ⬅️ ::Shape(%"add_43__702")
4210 | # n1__745
%"int64_m1__745"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_m1')}
4211 | # n2__745
%"tmp_subscripted__745"<INT64,?> ⬅️ ::Gather(%"tmp__745", %"int64_m1__745") {axis=0}
4212 | # n3__745
%"embedding_size__745"<FLOAT16,?> ⬅️ ::CastLike(%"tmp_subscripted__745", %"add_43__702")
4213 | # n4__745
%"const__745"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
4214 | # n5__745
%"tmp_0__745"<FLOAT16,?> ⬅️ ::Sqrt(%"embedding_size__745")
4215 | # n6__745
%"const_cast__745"<FLOAT16,?> ⬅️ ::CastLike(%"const__745", %"tmp_0__745")
4216 | # n7__745
%"_val_269__702"<FLOAT16,?> ⬅️ ::Div(%"const_cast__745", %"tmp_0__745")
4217 | # CastLike_320__702
%"_val_270__702"<FLOAT16,?> ⬅️ ::CastLike(%"_val_269__702", %"add_43__702")
4218 | # n0__746
%"tmp__746"<INT64,[unk__1072]> ⬅️ ::Shape(%"add_43__702")
4219 | # n1__746
%"int64_0_1d__746"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
4220 | # n2__746
%"int64_1_1d__746"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
4221 | # n3__746
%"int64_m2_1d__746"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
4222 | # n4__746
%"int64_m1_1d__746"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
4223 | # n5__746
%"target_length__746"<INT64,[unk__1073]> ⬅️ ::Slice(%"tmp__746", %"int64_m2_1d__746", %"int64_m1_1d__746", %"int64_0_1d__746", %"int64_1_1d__746")
4224 | # n6__746
%"tmp_0__746"<INT64,[4]> ⬅️ ::Shape(%"view_150__702")
4225 | # n7__746
%"int64_0_1d_1__746"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
4226 | # n8__746
%"int64_1_1d_2__746"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
4227 | # n9__746
%"int64_m2_1d_3__746"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_3')}
4228 | # n10__746
%"int64_m1_1d_4__746"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_4')}
4229 | # n11__746
%"source_length__746"<INT64,[1]> ⬅️ ::Slice(%"tmp_0__746", %"int64_m2_1d_3__746", %"int64_m1_1d_4__746", %"int64_0_1d_1__746", %"int64_1_1d_2__746")
4230 | # n12__746
%"size__746"<INT64,[unk__1074]> ⬅️ ::Concat(%"target_length__746", %"source_length__746") {axis=0}
4231 | # n13__746
%"const__746"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
4232 | # n14__746
%"attn_mask__746"<FLOAT,?> ⬅️ ::Expand(%"const__746", %"size__746")
4233 | # n15__746
%"attn_mask_5__746"<FLOAT,?> ⬅️ ::Trilu(%"attn_mask__746") {upper=0}
4234 | # n16__746
%"const_6__746"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_6')}
4235 | # n17__746
%"const_6_cast__746"<FLOAT,?> ⬅️ ::CastLike(%"const_6__746", %"attn_mask_5__746")
4236 | # n18__746
%"tmp_7__746"<BOOL,?> ⬅️ ::Equal(%"attn_mask_5__746", %"const_6_cast__746")
4237 | # n19__746
%"tmp_8__746"<FLOAT,?> ⬅️ ::Constant() {value_float=-inf}
4238 | # n20__746
%"const_9__746"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
4239 | # n21__746
%"const_9_cast__746"<FLOAT,?> ⬅️ ::CastLike(%"const_9__746", %"tmp_8__746")
4240 | # n22__746
%"attn_mask_10__746"<FLOAT,?> ⬅️ ::Where(%"tmp_7__746", %"tmp_8__746", %"const_9_cast__746")
4241 | # n23__746
%"_val_271__702"<FLOAT16,?> ⬅️ ::CastLike(%"attn_mask_10__746", %"add_43__702")
4242 | # n0__747
%"key_shape__747"<INT64,[4]> ⬅️ ::Shape(%"view_150__702")
4243 | # n1__747
%"int64_0_1d__747"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
4244 | # n2__747
%"int64_1_1d__747"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
4245 | # n3__747
%"int64_m1_1d__747"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
4246 | # n4__747
%"int64_9223372036854775807_1d__747"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_9223372036854775807_1d')}
4247 | # n5__747
%"key_last_dim__747"<INT64,[1]> ⬅️ ::Slice(%"key_shape__747", %"int64_m1_1d__747", %"int64_9223372036854775807_1d__747", %"int64_0_1d__747", %"int64_1_1d__747")
4248 | # n6__747
%"int64_0_1d_0__747"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_0')}
4249 | # n7__747
%"int64_1_1d_1__747"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_1')}
4250 | # n8__747
%"int64_m2_1d__747"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
4251 | # n9__747
%"int64_m1_1d_2__747"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_2')}
4252 | # n10__747
%"key_second_last_dim__747"<INT64,[1]> ⬅️ ::Slice(%"key_shape__747", %"int64_m2_1d__747", %"int64_m1_1d_2__747", %"int64_0_1d_0__747", %"int64_1_1d_1__747")
4253 | # n11__747
%"int64_0_1d_3__747"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
4254 | # n12__747
%"int64_1_1d_4__747"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
4255 | # n13__747
%"int64_m2_1d_5__747"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_5')}
4256 | # n14__747
%"key_first_dims__747"<INT64,[2]> ⬅️ ::Slice(%"key_shape__747", %"int64_0_1d_3__747", %"int64_m2_1d_5__747", %"int64_0_1d_3__747", %"int64_1_1d_4__747")
4257 | # n15__747
%"tmp__747"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4258 | # n16__747
%"key_squeezed_shape__747"<INT64,[3]> ⬅️ ::Concat(%"tmp__747", %"key_second_last_dim__747", %"key_last_dim__747") {axis=0}
4259 | # n17__747
%"key_squeezed__747"<FLOAT16,[unk__1075,unk__1076,unk__1077]> ⬅️ ::Reshape(%"view_150__702", %"key_squeezed_shape__747")
4260 | # n18__747
%"key_squeezed_transposed__747"<FLOAT16,[unk__1075,unk__1077,unk__1076]> ⬅️ ::Transpose(%"key_squeezed__747") {perm=[0, 2, 1]}
4261 | # n19__747
%"key_transposed_shape__747"<INT64,[4]> ⬅️ ::Concat(%"key_first_dims__747", %"key_last_dim__747", %"key_second_last_dim__747") {axis=0}
4262 | # n20__747
%"key_transposed__747"<FLOAT16,[unk__1078,unk__1079,unk__1080,unk__1081]> ⬅️ ::Reshape(%"key_squeezed_transposed__747", %"key_transposed_shape__747")
4263 | # n21__747
%"tmp_6__747"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__702")
4264 | # n22__747
%"query_scaled__747"<FLOAT16,?> ⬅️ ::Mul(%"add_43__702", %"tmp_6__747")
4265 | # n23__747
%"tmp_7__747"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__702")
4266 | # n24__747
%"key_transposed_scaled__747"<FLOAT16,[unk__1078,unk__1079,unk__1080,unk__1081]> ⬅️ ::Mul(%"key_transposed__747", %"tmp_7__747")
4267 | # n25__747
%"tmp_8__747"<FLOAT16,?> ⬅️ ::MatMul(%"query_scaled__747", %"key_transposed_scaled__747")
4268 | # n26__747
%"tmp_9__747"<FLOAT16,?> ⬅️ ::Add(%"tmp_8__747", %"_val_271__702")
4269 | # n27__747
%"attn_weight__747"<FLOAT16,?> ⬅️ ::Softmax(%"tmp_9__747") {axis=-1}
4270 | # n28__747
%"dropout_p__747"<FLOAT,?> ⬅️ ::Constant() {value_float=0.0}
4271 | # n29__747
%"attn_weight_10__747"<FLOAT16,?>, %"___747"<BOOL,?> ⬅️ ::Dropout(%"attn_weight__747", %"dropout_p__747")
4272 | # n30__747
%"_scaled_dot_product_efficient_attention_7__702"<FLOAT16,?> ⬅️ ::MatMul(%"attn_weight_10__747", %"view_151__702")
4273 | # n0__748
%"query_0__748"<FLOAT16,?> ⬅️ ::Transpose(%"add_43__702") {perm=[0, 2, 1, 3]}
4274 | # n1__748
%"query_shape__748"<INT64,[unk__1082]> ⬅️ ::Shape(%"query_0__748")
4275 | # n2__748
%"int64_0_1d__748"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
4276 | # n3__748
%"int64_1_1d__748"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
4277 | # n4__748
%"query_first_dims__748"<INT64,[unk__1083]> ⬅️ ::Slice(%"query_shape__748", %"int64_0_1d__748", %"int64_1_1d__748", %"int64_0_1d__748", %"int64_1_1d__748")
4278 | # n5__748
%"int64_0_1d_1__748"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
4279 | # n6__748
%"int64_1_1d_2__748"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
4280 | # n7__748
%"int64_2_1d__748"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_2_1d')}
4281 | # n8__748
%"query_second_dims__748"<INT64,[unk__1084]> ⬅️ ::Slice(%"query_shape__748", %"int64_1_1d_2__748", %"int64_2_1d__748", %"int64_0_1d_1__748", %"int64_1_1d_2__748")
4282 | # n9__748
%"int64_0_1d_3__748"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
4283 | # n10__748
%"int64_1_1d_4__748"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
4284 | # n11__748
%"int64_m2_1d__748"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
4285 | # n12__748
%"int64_m1_1d__748"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
4286 | # n13__748
%"num_heads__748"<INT64,[unk__1085]> ⬅️ ::Slice(%"query_shape__748", %"int64_m2_1d__748", %"int64_m1_1d__748", %"int64_0_1d_3__748", %"int64_1_1d_4__748")
4287 | # n14__748
%"compute_log_sumexp__748"<INT64,?> ⬅️ ::Constant() {value_int=0}
4288 | # n15__748
%"compute_log_sumexp_as_bool__748"<BOOL,?> ⬅️ ::Cast(%"compute_log_sumexp__748") {to=9}
4289 | # n16__748
%"_scaled_dot_product_efficient_attention_7_1__702"<FLOAT,?> ⬅️ ::If(%"compute_log_sumexp_as_bool__748") {then_branch=
graph(
name=thenGraph_14,
inputs=(
),
outputs=(
%"logsum_exp__748"<FLOAT,?>
),
) {
0 | # n0__748_313
%"tmp__748"<FLOAT,[unk__1084]> ⬅️ ::Cast(%"query_second_dims__748") {to=1}
1 | # n1__748_314
%"const__748"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
2 | # n2__748_315
%"const_cast__748"<FLOAT,?> ⬅️ ::CastLike(%"const__748", %"tmp__748")
3 | # n3__748_316
%"tmp_5__748"<FLOAT,[unk__1084]> ⬅️ ::Div(%"tmp__748", %"const_cast__748")
4 | # n4__748_317
%"tmp_6__748"<FLOAT,[unk__1084]> ⬅️ ::Ceil(%"tmp_5__748")
5 | # n5__748_318
%"const_7__748"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_7')}
6 | # n6__748_319
%"const_7_cast__748"<FLOAT,?> ⬅️ ::CastLike(%"const_7__748", %"tmp_6__748")
7 | # n7__748_320
%"tmp_8__748"<FLOAT,[unk__1084]> ⬅️ ::Mul(%"tmp_6__748", %"const_7_cast__748")
8 | # n8__748_321
%"logsumexp_dim__748"<INT64,[unk__1084]> ⬅️ ::Cast(%"tmp_8__748") {to=7}
9 | # n9__748_322
%"const_9__748"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
10 | # n10__748_323
%"tmp_10__748"<INT64,[unk__1086]> ⬅️ ::Concat(%"query_first_dims__748", %"num_heads__748", %"logsumexp_dim__748") {axis=0}
11 | # n11__748_324
%"logsum_exp__748"<FLOAT,?> ⬅️ ::Expand(%"const_9__748", %"tmp_10__748")
return %"logsum_exp__748"<FLOAT,?>
}, else_branch=
graph(
name=elseGraph_14,
inputs=(
),
outputs=(
%"logsum_exp_14__748"<FLOAT,?>
),
) {
0 | # n0__748_325
%"const_11__748"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_11')}
1 | # n1__748_326
%"int64_0_1d_12__748"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_12')}
2 | # n2__748_327
%"int64_0_1d_12_cast__748"<INT64,[1]> ⬅️ ::CastLike(%"int64_0_1d_12__748", %"num_heads__748")
3 | # n3__748_328
%"tmp_13__748"<INT64,[unk__1087]> ⬅️ ::Concat(%"query_first_dims__748", %"num_heads__748", %"int64_0_1d_12_cast__748") {axis=0}
4 | # n4__748_329
%"logsum_exp_14__748"<FLOAT,?> ⬅️ ::Expand(%"const_11__748", %"tmp_13__748")
return %"logsum_exp_14__748"<FLOAT,?>
}}
4290 | # n17__748
%"tmp_16__748"<INT64,[0]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[0]>('Empty_INTS')}
4291 | # n18__748
%"tmp_17__748"<FLOAT,?> ⬅️ ::ConstantOfShape(%"tmp_16__748")
4292 | # n19__748
%"_scaled_dot_product_efficient_attention_7_3__702"<INT64,?> ⬅️ ::Cast(%"tmp_17__748") {to=7}
4293 | # Transpose_324__702
%"transpose_31__702"<FLOAT16,?> ⬅️ ::Transpose(%"_scaled_dot_product_efficient_attention_7__702") {perm=[0, 2, 1, 3]}
4294 | # Constant_325__702
%"_val_276__702"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4295 | # n0__749
%"size_0__749"<INT64,[3]> ⬅️ ::Cast(%"_val_276__702") {to=7}
4296 | # n1__749
%"view_152__702"<FLOAT16,[unk__1088,unk__1089,unk__1090]> ⬅️ ::Reshape(%"transpose_31__702", %"size_0__749")
4297 | # n0__752
%"tmp__752"<INT64,[2]> ⬅️ ::Shape(%"model.layers.7.self_attn.o_proj.weight")
4298 | # n1__752
%"rank__751"<INT64,?> ⬅️ ::Size(%"tmp__752")
4299 | # n1__751
%"int64_2__751"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
4300 | # n2__751
%"int64_2_cast__751"<INT64,?> ⬅️ ::CastLike(%"int64_2__751", %"rank__751")
4301 | # n3__751
%"cond__751"<BOOL,?> ⬅️ ::Equal(%"rank__751", %"int64_2_cast__751")
4302 | # n4__751
%"t_52__750"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__751") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__751"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__751_330
%"result__751"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.7.self_attn.o_proj.weight") {perm=[1, 0]}
return %"result__751"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__751"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__751_331
%"result_0__751"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.7.self_attn.o_proj.weight")
return %"result_0__751"<FLOAT16,[4096,4096]>
}}
4303 | # Constant_3__750
%"_val_3__750"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
4304 | # n0__753
%"size_0__753"<INT64,[2]> ⬅️ ::Cast(%"_val_3__750") {to=7}
4305 | # n1__753
%"view_153__750"<FLOAT16,[unk__1091,unk__1092]> ⬅️ ::Reshape(%"view_152__702", %"size_0__753")
4306 | # n0__754
%"mm_52__750"<FLOAT16,[unk__1091,4096]> ⬅️ ::MatMul(%"view_153__750", %"t_52__750")
4307 | # Constant_6__750
%"_val_6__750"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4308 | # n0__755
%"size_0__755"<INT64,[3]> ⬅️ ::Cast(%"_val_6__750") {to=7}
4309 | # n1__755
%"model_layers_7_self_attn_1_2__692"<FLOAT16,[unk__1093,unk__1094,unk__1095]> ⬅️ ::Reshape(%"mm_52__750", %"size_0__755")
4310 | # n0__756
%"alpha__756"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
4311 | # n1__756
%"alpha_0__756"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__756", %"model_layers_7_self_attn_1_2__692")
4312 | # n2__756
%"other_1__756"<FLOAT16,[unk__1093,unk__1094,unk__1095]> ⬅️ ::Mul(%"model_layers_7_self_attn_1_2__692", %"alpha_0__756")
4313 | # n3__756
%"add_45__692"<FLOAT16,[unk__1096,128,4096]> ⬅️ ::Add(%"model_layers_6_1_2__1", %"other_1__756")
4314 | # Cast_3__757
%"_to_copy_44__757"<FLOAT,[unk__1096,128,4096]> ⬅️ ::Cast(%"add_45__692") {to=1}
4315 | # Constant_4__757
%"_val_2__757"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4316 | # Cast_5__757
%"scalar_tensor_default_15__757"<FLOAT,?> ⬅️ ::Cast(%"_val_2__757") {to=1}
4317 | # n0__758
%"pow_16__757"<FLOAT,[unk__1096,128,4096]> ⬅️ ::Pow(%"_to_copy_44__757", %"scalar_tensor_default_15__757")
4318 | # Constant_7__757
%"_val_5__757"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
4319 | # n0__760
%"tmp__760"<INT64,[3]> ⬅️ ::Shape(%"pow_16__757")
4320 | # n1__760
%"tmp_0__760"<INT64,?> ⬅️ ::Size(%"tmp__760")
4321 | # n2__760
%"tmp_1__760"<INT64,?> ⬅️ ::Constant() {value_int=0}
4322 | # n3__760
%"cond__759"<BOOL,?> ⬅️ ::Equal(%"tmp_0__760", %"tmp_1__760")
4323 | # n1__759
%"mean_15__757"<FLOAT,?> ⬅️ ::If(%"cond__759") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__759"<FLOAT,[unk__1096,128,4096]>
),
) {
0 | # n0__759_332
%"result__759"<FLOAT,[unk__1096,128,4096]> ⬅️ ::Identity(%"pow_16__757")
return %"result__759"<FLOAT,[unk__1096,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__759"<FLOAT,?>
),
) {
0 | # n0__761
%"tmp__761"<INT64,[1]> ⬅️ ::Shape(%"_val_5__757")
1 | # n1__761
%"tmp_0__761"<INT64,?> ⬅️ ::Size(%"tmp__761")
2 | # n2__761
%"tmp_1__761"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__761
%"cond_0__759"<BOOL,?> ⬅️ ::Equal(%"tmp_0__761", %"tmp_1__761")
4 | # n1__759_334
%"dim_3__759"<INT64,?> ⬅️ ::If(%"cond_0__759") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__759"<INT64,[1,1]>
),
) {
0 | # n0__759_335
%"int64_0__759"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__759_336
%"dim_1__759"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__757", %"int64_0__759")
return %"dim_1__759"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__759"<INT64,[1]>
),
) {
0 | # n0__759_337
%"dim_2__759"<INT64,[1]> ⬅️ ::Identity(%"_val_5__757")
return %"dim_2__759"<INT64,[1]>
}}
5 | # n2__759
%"result_4__759"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_16__757", %"dim_3__759") {keepdims=1}
return %"result_4__759"<FLOAT,?>
}}
4324 | # Constant_9__757
%"_val_7__757"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
4325 | # n0__762
%"alpha__762"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
4326 | # n1__762
%"alpha_0__762"<FLOAT,?> ⬅️ ::CastLike(%"alpha__762", %"_val_7__757")
4327 | # n2__762
%"other_1__762"<FLOAT,?> ⬅️ ::Mul(%"_val_7__757", %"alpha_0__762")
4328 | # n3__762
%"add_46__757"<FLOAT,?> ⬅️ ::Add(%"mean_15__757", %"other_1__762")
4329 | # n0__763
%"tmp__763"<FLOAT,?> ⬅️ ::Sqrt(%"add_46__757")
4330 | # n1__763
%"rsqrt_15__757"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__763")
4331 | # n0__764
%"mul_76__757"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_44__757", %"rsqrt_15__757")
4332 | # Cast_13__757
%"_to_copy_45__757"<FLOAT16,?> ⬅️ ::Cast(%"mul_76__757") {to=10}
4333 | # n0__765
%"model_layers_7_post_attention_layernorm_1__692"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.7.post_attention_layernorm.weight", %"_to_copy_45__757")
4334 | # n0__769
%"tmp__769"<INT64,[2]> ⬅️ ::Shape(%"model.layers.7.mlp.gate_proj.weight")
4335 | # n1__769
%"rank__768"<INT64,?> ⬅️ ::Size(%"tmp__769")
4336 | # n1__768
%"int64_2__768"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
4337 | # n2__768
%"int64_2_cast__768"<INT64,?> ⬅️ ::CastLike(%"int64_2__768", %"rank__768")
4338 | # n3__768
%"cond__768"<BOOL,?> ⬅️ ::Equal(%"rank__768", %"int64_2_cast__768")
4339 | # n4__768
%"t_53__767"<FLOAT16,[unk__1097,unk__1098]> ⬅️ ::If(%"cond__768") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__768"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__768_338
%"result__768"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.7.mlp.gate_proj.weight") {perm=[1, 0]}
return %"result__768"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__768"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__768_339
%"result_0__768"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.7.mlp.gate_proj.weight")
return %"result_0__768"<FLOAT16,[14336,4096]>
}}
4340 | # Constant_3__767
%"_val_3__767"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
4341 | # n0__770
%"size_0__770"<INT64,[2]> ⬅️ ::Cast(%"_val_3__767") {to=7}
4342 | # n1__770
%"view_155__767"<FLOAT16,[unk__1099,unk__1100]> ⬅️ ::Reshape(%"model_layers_7_post_attention_layernorm_1__692", %"size_0__770")
4343 | # n0__771
%"mm_53__767"<FLOAT16,[unk__1099,unk__1098]> ⬅️ ::MatMul(%"view_155__767", %"t_53__767")
4344 | # Constant_6__767
%"_val_6__767"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4345 | # n0__772
%"size_0__772"<INT64,[3]> ⬅️ ::Cast(%"_val_6__767") {to=7}
4346 | # n1__772
%"model_layers_7_mlp_gate_proj_1__766"<FLOAT16,[unk__1101,unk__1102,unk__1103]> ⬅️ ::Reshape(%"mm_53__767", %"size_0__772")
4347 | # Cast_0__773
%"_to_copy_46__773"<FLOAT,[unk__1101,unk__1102,unk__1103]> ⬅️ ::Cast(%"model_layers_7_mlp_gate_proj_1__766") {to=1}
4348 | # n0__774
%"sigmoid_7__773"<FLOAT,[unk__1101,unk__1102,unk__1103]> ⬅️ ::Sigmoid(%"_to_copy_46__773")
4349 | # n0__775
%"mul_78__773"<FLOAT,[unk__1101,unk__1102,unk__1103]> ⬅️ ::Mul(%"_to_copy_46__773", %"sigmoid_7__773")
4350 | # Cast_3__773
%"model_layers_7_mlp_act_fn_1__766"<FLOAT16,[unk__1101,unk__1102,unk__1103]> ⬅️ ::Cast(%"mul_78__773") {to=10}
4351 | # n0__778
%"tmp__778"<INT64,[2]> ⬅️ ::Shape(%"model.layers.7.mlp.up_proj.weight")
4352 | # n1__778
%"rank__777"<INT64,?> ⬅️ ::Size(%"tmp__778")
4353 | # n1__777
%"int64_2__777"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
4354 | # n2__777
%"int64_2_cast__777"<INT64,?> ⬅️ ::CastLike(%"int64_2__777", %"rank__777")
4355 | # n3__777
%"cond__777"<BOOL,?> ⬅️ ::Equal(%"rank__777", %"int64_2_cast__777")
4356 | # n4__777
%"t_54__776"<FLOAT16,[unk__1104,unk__1105]> ⬅️ ::If(%"cond__777") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__777"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__777_340
%"result__777"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.7.mlp.up_proj.weight") {perm=[1, 0]}
return %"result__777"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__777"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__777_341
%"result_0__777"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.7.mlp.up_proj.weight")
return %"result_0__777"<FLOAT16,[14336,4096]>
}}
4357 | # Constant_3__776
%"_val_3__776"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
4358 | # n0__779
%"size_0__779"<INT64,[2]> ⬅️ ::Cast(%"_val_3__776") {to=7}
4359 | # n1__779
%"view_157__776"<FLOAT16,[unk__1106,unk__1107]> ⬅️ ::Reshape(%"model_layers_7_post_attention_layernorm_1__692", %"size_0__779")
4360 | # n0__780
%"mm_54__776"<FLOAT16,[unk__1106,unk__1105]> ⬅️ ::MatMul(%"view_157__776", %"t_54__776")
4361 | # Constant_6__776
%"_val_6__776"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4362 | # n0__781
%"size_0__781"<INT64,[3]> ⬅️ ::Cast(%"_val_6__776") {to=7}
4363 | # n1__781
%"model_layers_7_mlp_up_proj_1__766"<FLOAT16,[unk__1108,unk__1109,unk__1110]> ⬅️ ::Reshape(%"mm_54__776", %"size_0__781")
4364 | # n0__782
%"mul_79__766"<FLOAT16,[unk__1111,unk__1112,unk__1113]> ⬅️ ::Mul(%"model_layers_7_mlp_act_fn_1__766", %"model_layers_7_mlp_up_proj_1__766")
4365 | # n0__785
%"tmp__785"<INT64,[2]> ⬅️ ::Shape(%"model.layers.7.mlp.down_proj.weight")
4366 | # n1__785
%"rank__784"<INT64,?> ⬅️ ::Size(%"tmp__785")
4367 | # n1__784
%"int64_2__784"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
4368 | # n2__784
%"int64_2_cast__784"<INT64,?> ⬅️ ::CastLike(%"int64_2__784", %"rank__784")
4369 | # n3__784
%"cond__784"<BOOL,?> ⬅️ ::Equal(%"rank__784", %"int64_2_cast__784")
4370 | # n4__784
%"t_55__783"<FLOAT16,[unk__1114,unk__1115]> ⬅️ ::If(%"cond__784") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__784"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__784_342
%"result__784"<FLOAT16,[14336,4096]> ⬅️ ::Transpose(%"model.layers.7.mlp.down_proj.weight") {perm=[1, 0]}
return %"result__784"<FLOAT16,[14336,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__784"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__784_343
%"result_0__784"<FLOAT16,[4096,14336]> ⬅️ ::Identity(%"model.layers.7.mlp.down_proj.weight")
return %"result_0__784"<FLOAT16,[4096,14336]>
}}
4371 | # Constant_3__783
%"_val_3__783"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
4372 | # n0__786
%"size_0__786"<INT64,[2]> ⬅️ ::Cast(%"_val_3__783") {to=7}
4373 | # n1__786
%"view_159__783"<FLOAT16,[unk__1116,unk__1117]> ⬅️ ::Reshape(%"mul_79__766", %"size_0__786")
4374 | # n0__787
%"mm_55__783"<FLOAT16,[unk__1116,unk__1115]> ⬅️ ::MatMul(%"view_159__783", %"t_55__783")
4375 | # Constant_6__783
%"_val_6__783"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4376 | # n0__788
%"size_0__788"<INT64,[3]> ⬅️ ::Cast(%"_val_6__783") {to=7}
4377 | # n1__788
%"model_layers_7_mlp_1__692"<FLOAT16,[unk__1118,unk__1119,unk__1120]> ⬅️ ::Reshape(%"mm_55__783", %"size_0__788")
4378 | # n0__789
%"alpha__789"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
4379 | # n1__789
%"alpha_0__789"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__789", %"model_layers_7_mlp_1__692")
4380 | # n2__789
%"other_1__789"<FLOAT16,[unk__1118,unk__1119,unk__1120]> ⬅️ ::Mul(%"model_layers_7_mlp_1__692", %"alpha_0__789")
4381 | # n3__789
%"model_layers_7_1_2__1"<FLOAT16,[unk__1121,128,4096]> ⬅️ ::Add(%"add_45__692", %"other_1__789")
4382 | # Cast_3__791
%"_to_copy_48__791"<FLOAT,[unk__1121,128,4096]> ⬅️ ::Cast(%"model_layers_7_1_2__1") {to=1}
4383 | # Constant_4__791
%"_val_2__791"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4384 | # Cast_5__791
%"scalar_tensor_default_16__791"<FLOAT,?> ⬅️ ::Cast(%"_val_2__791") {to=1}
4385 | # n0__792
%"pow_17__791"<FLOAT,[unk__1121,128,4096]> ⬅️ ::Pow(%"_to_copy_48__791", %"scalar_tensor_default_16__791")
4386 | # Constant_7__791
%"_val_5__791"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
4387 | # n0__794
%"tmp__794"<INT64,[3]> ⬅️ ::Shape(%"pow_17__791")
4388 | # n1__794
%"tmp_0__794"<INT64,?> ⬅️ ::Size(%"tmp__794")
4389 | # n2__794
%"tmp_1__794"<INT64,?> ⬅️ ::Constant() {value_int=0}
4390 | # n3__794
%"cond__793"<BOOL,?> ⬅️ ::Equal(%"tmp_0__794", %"tmp_1__794")
4391 | # n1__793
%"mean_16__791"<FLOAT,?> ⬅️ ::If(%"cond__793") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__793"<FLOAT,[unk__1121,128,4096]>
),
) {
0 | # n0__793_344
%"result__793"<FLOAT,[unk__1121,128,4096]> ⬅️ ::Identity(%"pow_17__791")
return %"result__793"<FLOAT,[unk__1121,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__793"<FLOAT,?>
),
) {
0 | # n0__795
%"tmp__795"<INT64,[1]> ⬅️ ::Shape(%"_val_5__791")
1 | # n1__795
%"tmp_0__795"<INT64,?> ⬅️ ::Size(%"tmp__795")
2 | # n2__795
%"tmp_1__795"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__795
%"cond_0__793"<BOOL,?> ⬅️ ::Equal(%"tmp_0__795", %"tmp_1__795")
4 | # n1__793_346
%"dim_3__793"<INT64,?> ⬅️ ::If(%"cond_0__793") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__793"<INT64,[1,1]>
),
) {
0 | # n0__793_347
%"int64_0__793"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__793_348
%"dim_1__793"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__791", %"int64_0__793")
return %"dim_1__793"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__793"<INT64,[1]>
),
) {
0 | # n0__793_349
%"dim_2__793"<INT64,[1]> ⬅️ ::Identity(%"_val_5__791")
return %"dim_2__793"<INT64,[1]>
}}
5 | # n2__793
%"result_4__793"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_17__791", %"dim_3__793") {keepdims=1}
return %"result_4__793"<FLOAT,?>
}}
4392 | # Constant_9__791
%"_val_7__791"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
4393 | # n0__796
%"alpha__796"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
4394 | # n1__796
%"alpha_0__796"<FLOAT,?> ⬅️ ::CastLike(%"alpha__796", %"_val_7__791")
4395 | # n2__796
%"other_1__796"<FLOAT,?> ⬅️ ::Mul(%"_val_7__791", %"alpha_0__796")
4396 | # n3__796
%"add_48__791"<FLOAT,?> ⬅️ ::Add(%"mean_16__791", %"other_1__796")
4397 | # n0__797
%"tmp__797"<FLOAT,?> ⬅️ ::Sqrt(%"add_48__791")
4398 | # n1__797
%"rsqrt_16__791"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__797")
4399 | # n0__798
%"mul_80__791"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_48__791", %"rsqrt_16__791")
4400 | # Cast_13__791
%"_to_copy_49__791"<FLOAT16,?> ⬅️ ::Cast(%"mul_80__791") {to=10}
4401 | # n0__799
%"model_layers_8_input_layernorm_1__790"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.8.input_layernorm.weight", %"_to_copy_49__791")
4402 | # n0__803
%"tmp__803"<INT64,[2]> ⬅️ ::Shape(%"model.layers.8.self_attn.q_proj.weight")
4403 | # n1__803
%"rank__802"<INT64,?> ⬅️ ::Size(%"tmp__803")
4404 | # n1__802
%"int64_2__802"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
4405 | # n2__802
%"int64_2_cast__802"<INT64,?> ⬅️ ::CastLike(%"int64_2__802", %"rank__802")
4406 | # n3__802
%"cond__802"<BOOL,?> ⬅️ ::Equal(%"rank__802", %"int64_2_cast__802")
4407 | # n4__802
%"t_56__801"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__802") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__802"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__802_350
%"result__802"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.8.self_attn.q_proj.weight") {perm=[1, 0]}
return %"result__802"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__802"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__802_351
%"result_0__802"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.8.self_attn.q_proj.weight")
return %"result_0__802"<FLOAT16,[4096,4096]>
}}
4408 | # Constant_3__801
%"_val_3__801"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
4409 | # n0__804
%"size_0__804"<INT64,[2]> ⬅️ ::Cast(%"_val_3__801") {to=7}
4410 | # n1__804
%"view_161__801"<FLOAT16,[unk__1122,unk__1123]> ⬅️ ::Reshape(%"model_layers_8_input_layernorm_1__790", %"size_0__804")
4411 | # n0__805
%"mm_56__801"<FLOAT16,[unk__1122,4096]> ⬅️ ::MatMul(%"view_161__801", %"t_56__801")
4412 | # Constant_6__801
%"_val_6__801"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4413 | # n0__806
%"size_0__806"<INT64,[3]> ⬅️ ::Cast(%"_val_6__801") {to=7}
4414 | # n1__806
%"model_layers_8_self_attn_q_proj_1__800"<FLOAT16,[unk__1124,unk__1125,unk__1126]> ⬅️ ::Reshape(%"mm_56__801", %"size_0__806")
4415 | # n0__809
%"tmp__809"<INT64,[2]> ⬅️ ::Shape(%"model.layers.8.self_attn.k_proj.weight")
4416 | # n1__809
%"rank__808"<INT64,?> ⬅️ ::Size(%"tmp__809")
4417 | # n1__808
%"int64_2__808"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
4418 | # n2__808
%"int64_2_cast__808"<INT64,?> ⬅️ ::CastLike(%"int64_2__808", %"rank__808")
4419 | # n3__808
%"cond__808"<BOOL,?> ⬅️ ::Equal(%"rank__808", %"int64_2_cast__808")
4420 | # n4__808
%"t_57__807"<FLOAT16,[unk__1127,unk__1128]> ⬅️ ::If(%"cond__808") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__808"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__808_352
%"result__808"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.8.self_attn.k_proj.weight") {perm=[1, 0]}
return %"result__808"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__808"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__808_353
%"result_0__808"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.8.self_attn.k_proj.weight")
return %"result_0__808"<FLOAT16,[1024,4096]>
}}
4421 | # Constant_3__807
%"_val_3__807"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
4422 | # n0__810
%"size_0__810"<INT64,[2]> ⬅️ ::Cast(%"_val_3__807") {to=7}
4423 | # n1__810
%"view_163__807"<FLOAT16,[unk__1129,unk__1130]> ⬅️ ::Reshape(%"model_layers_8_input_layernorm_1__790", %"size_0__810")
4424 | # n0__811
%"mm_57__807"<FLOAT16,[unk__1129,unk__1128]> ⬅️ ::MatMul(%"view_163__807", %"t_57__807")
4425 | # Constant_6__807
%"_val_6__807"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4426 | # n0__812
%"size_0__812"<INT64,[3]> ⬅️ ::Cast(%"_val_6__807") {to=7}
4427 | # n1__812
%"model_layers_8_self_attn_k_proj_1__800"<FLOAT16,[unk__1131,unk__1132,unk__1133]> ⬅️ ::Reshape(%"mm_57__807", %"size_0__812")
4428 | # n0__815
%"tmp__815"<INT64,[2]> ⬅️ ::Shape(%"model.layers.8.self_attn.v_proj.weight")
4429 | # n1__815
%"rank__814"<INT64,?> ⬅️ ::Size(%"tmp__815")
4430 | # n1__814
%"int64_2__814"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
4431 | # n2__814
%"int64_2_cast__814"<INT64,?> ⬅️ ::CastLike(%"int64_2__814", %"rank__814")
4432 | # n3__814
%"cond__814"<BOOL,?> ⬅️ ::Equal(%"rank__814", %"int64_2_cast__814")
4433 | # n4__814
%"t_58__813"<FLOAT16,[unk__1134,unk__1135]> ⬅️ ::If(%"cond__814") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__814"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__814_354
%"result__814"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.8.self_attn.v_proj.weight") {perm=[1, 0]}
return %"result__814"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__814"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__814_355
%"result_0__814"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.8.self_attn.v_proj.weight")
return %"result_0__814"<FLOAT16,[1024,4096]>
}}
4434 | # Constant_3__813
%"_val_3__813"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
4435 | # n0__816
%"size_0__816"<INT64,[2]> ⬅️ ::Cast(%"_val_3__813") {to=7}
4436 | # n1__816
%"view_165__813"<FLOAT16,[unk__1136,unk__1137]> ⬅️ ::Reshape(%"model_layers_8_input_layernorm_1__790", %"size_0__816")
4437 | # n0__817
%"mm_58__813"<FLOAT16,[unk__1136,unk__1135]> ⬅️ ::MatMul(%"view_165__813", %"t_58__813")
4438 | # Constant_6__813
%"_val_6__813"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4439 | # n0__818
%"size_0__818"<INT64,[3]> ⬅️ ::Cast(%"_val_6__813") {to=7}
4440 | # n1__818
%"model_layers_8_self_attn_v_proj_1__800"<FLOAT16,[unk__1138,unk__1139,unk__1140]> ⬅️ ::Reshape(%"mm_58__813", %"size_0__818")
4441 | # Constant_61__800
%"_val_8__800"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
4442 | # n0__819
%"size_0__819"<INT64,[4]> ⬅️ ::Cast(%"_val_8__800") {to=7}
4443 | # n1__819
%"view_167__800"<FLOAT16,[unk__1141,unk__1142,unk__1143,unk__1144]> ⬅️ ::Reshape(%"model_layers_8_self_attn_q_proj_1__800", %"size_0__819")
4444 | # Transpose_63__800
%"transpose_32__800"<FLOAT16,[unk__1141,unk__1143,unk__1142,unk__1144]> ⬅️ ::Transpose(%"view_167__800") {perm=[0, 2, 1, 3]}
4445 | # Constant_64__800
%"_val_11__800"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
4446 | # n0__820
%"size_0__820"<INT64,[4]> ⬅️ ::Cast(%"_val_11__800") {to=7}
4447 | # n1__820
%"view_168__800"<FLOAT16,[unk__1145,unk__1146,unk__1147,unk__1148]> ⬅️ ::Reshape(%"model_layers_8_self_attn_k_proj_1__800", %"size_0__820")
4448 | # Transpose_66__800
%"transpose_33__800"<FLOAT16,[unk__1145,unk__1147,unk__1146,unk__1148]> ⬅️ ::Transpose(%"view_168__800") {perm=[0, 2, 1, 3]}
4449 | # Constant_67__800
%"_val_14__800"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
4450 | # n0__821
%"size_0__821"<INT64,[4]> ⬅️ ::Cast(%"_val_14__800") {to=7}
4451 | # n1__821
%"view_169__800"<FLOAT16,[unk__1149,unk__1150,unk__1151,unk__1152]> ⬅️ ::Reshape(%"model_layers_8_self_attn_v_proj_1__800", %"size_0__821")
4452 | # Transpose_69__800
%"model_1_16"<FLOAT16,[1,8,128,128]> ⬅️ ::Transpose(%"view_169__800") {perm=[0, 2, 1, 3]}
4453 | # Constant_8__822
%"_val_1__822"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4454 | # Cast_9__822
%"_val_2__822"<INT64,?> ⬅️ ::Cast(%"_val_1__822") {to=7}
4455 | # Constant_10__822
%"_val_3__822"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4456 | # Reshape_11__822
%"_val_4__822"<INT64,[1]> ⬅️ ::Reshape(%"_val_2__822", %"_val_3__822") {allowzero=0}
4457 | # Constant_12__822
%"_val_5__822"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4458 | # Cast_13__822
%"_val_6__822"<INT64,?> ⬅️ ::Cast(%"_val_5__822") {to=7}
4459 | # Constant_14__822
%"_val_7__822"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4460 | # Reshape_15__822
%"_val_8__822"<INT64,[1]> ⬅️ ::Reshape(%"_val_6__822", %"_val_7__822") {allowzero=0}
4461 | # Constant_16__822
%"_val_9__822"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4462 | # Cast_17__822
%"_val_10__822"<INT64,?> ⬅️ ::Cast(%"_val_9__822") {to=7}
4463 | # Constant_18__822
%"_val_11__822"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4464 | # Reshape_19__822
%"_val_12__822"<INT64,[1]> ⬅️ ::Reshape(%"_val_10__822", %"_val_11__822") {allowzero=0}
4465 | # Constant_20__822
%"_val_13__822"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4466 | # Cast_21__822
%"_val_14__822"<INT64,?> ⬅️ ::Cast(%"_val_13__822") {to=7}
4467 | # Constant_22__822
%"_val_15__822"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4468 | # Reshape_23__822
%"_val_16__822"<INT64,[1]> ⬅️ ::Reshape(%"_val_14__822", %"_val_15__822") {allowzero=0}
4469 | # Slice_24__822
%"model_layers_8_self_attn_rotary_emb_1__800"<FLOAT16,[unk__1153,unk__1154]> ⬅️ ::Slice(%"model.layers.8.self_attn.rotary_emb.cos_cached", %"_val_4__822", %"_val_8__822", %"_val_12__822", %"_val_16__822")
4470 | # Constant_25__822
%"_val_19__822"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4471 | # Cast_26__822
%"_val_20__822"<INT64,?> ⬅️ ::Cast(%"_val_19__822") {to=7}
4472 | # Constant_27__822
%"_val_21__822"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4473 | # Reshape_28__822
%"_val_22__822"<INT64,[1]> ⬅️ ::Reshape(%"_val_20__822", %"_val_21__822") {allowzero=0}
4474 | # Constant_29__822
%"_val_23__822"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4475 | # Cast_30__822
%"_val_24__822"<INT64,?> ⬅️ ::Cast(%"_val_23__822") {to=7}
4476 | # Constant_31__822
%"_val_25__822"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4477 | # Reshape_32__822
%"_val_26__822"<INT64,[1]> ⬅️ ::Reshape(%"_val_24__822", %"_val_25__822") {allowzero=0}
4478 | # Constant_33__822
%"_val_27__822"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4479 | # Cast_34__822
%"_val_28__822"<INT64,?> ⬅️ ::Cast(%"_val_27__822") {to=7}
4480 | # Constant_35__822
%"_val_29__822"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4481 | # Reshape_36__822
%"_val_30__822"<INT64,[1]> ⬅️ ::Reshape(%"_val_28__822", %"_val_29__822") {allowzero=0}
4482 | # Constant_37__822
%"_val_31__822"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4483 | # Cast_38__822
%"_val_32__822"<INT64,?> ⬅️ ::Cast(%"_val_31__822") {to=7}
4484 | # Constant_39__822
%"_val_33__822"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4485 | # Reshape_40__822
%"_val_34__822"<INT64,[1]> ⬅️ ::Reshape(%"_val_32__822", %"_val_33__822") {allowzero=0}
4486 | # Slice_41__822
%"model_layers_8_self_attn_rotary_emb_1_1__800"<FLOAT16,[unk__1155,unk__1156]> ⬅️ ::Slice(%"model.layers.8.self_attn.rotary_emb.sin_cached", %"_val_22__822", %"_val_26__822", %"_val_30__822", %"_val_34__822")
4487 | # Transpose_71__800
%"_val_21__800"<FLOAT16,[unk__1153,unk__1154]> ⬅️ ::Transpose(%"model_layers_8_self_attn_rotary_emb_1__800") {perm=[0, 1]}
4488 | # Max_72__800
%"_val_22__800"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
4489 | # Shape_73__800
%"_val_23__800"<INT64,[2]> ⬅️ ::Shape(%"_val_22__800") {start=0}
4490 | # Expand_74__800
%"_val_24__800"<INT64,[unk__1157,unk__1158]> ⬅️ ::Expand(%"view__1", %"_val_23__800")
4491 | # Constant_75__800
%"_val_25__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4492 | # Unsqueeze_76__800
%"_val_26__800"<INT64,[unk__1157,unk__1158,1]> ⬅️ ::Unsqueeze(%"_val_24__800", %"_val_25__800")
4493 | # Concat_77__800
%"_val_27__800"<INT64,[unk__1157,unk__1158,1]> ⬅️ ::Concat(%"_val_26__800") {axis=-1}
4494 | # GatherND_78__800
%"_val_28__800"<FLOAT16,[unk__1157,unk__1158,unk__1154]> ⬅️ ::GatherND(%"_val_21__800", %"_val_27__800") {batch_dims=0}
4495 | # Transpose_79__800
%"index_16__800"<FLOAT16,[unk__1157,unk__1158,unk__1154]> ⬅️ ::Transpose(%"_val_28__800") {perm=[0, 1, 2]}
4496 | # n0__823
%"dim__823"<INT64,?> ⬅️ ::Constant() {value_int=1}
4497 | # n1__823
%"dim_0__823"<INT64,?> ⬅️ ::Cast(%"dim__823") {to=7}
4498 | # n2__823
%"unsqueeze_33__800"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_16__800", %"dim_0__823")
4499 | # Transpose_81__800
%"_val_31__800"<FLOAT16,[unk__1155,unk__1156]> ⬅️ ::Transpose(%"model_layers_8_self_attn_rotary_emb_1_1__800") {perm=[0, 1]}
4500 | # Max_82__800
%"_val_32__800"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
4501 | # Shape_83__800
%"_val_33__800"<INT64,[2]> ⬅️ ::Shape(%"_val_32__800") {start=0}
4502 | # Expand_84__800
%"_val_34__800"<INT64,[unk__1159,unk__1160]> ⬅️ ::Expand(%"view__1", %"_val_33__800")
4503 | # Constant_85__800
%"_val_35__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4504 | # Unsqueeze_86__800
%"_val_36__800"<INT64,[unk__1159,unk__1160,1]> ⬅️ ::Unsqueeze(%"_val_34__800", %"_val_35__800")
4505 | # Concat_87__800
%"_val_37__800"<INT64,[unk__1159,unk__1160,1]> ⬅️ ::Concat(%"_val_36__800") {axis=-1}
4506 | # GatherND_88__800
%"_val_38__800"<FLOAT16,[unk__1159,unk__1160,unk__1156]> ⬅️ ::GatherND(%"_val_31__800", %"_val_37__800") {batch_dims=0}
4507 | # Transpose_89__800
%"index_17__800"<FLOAT16,[unk__1159,unk__1160,unk__1156]> ⬅️ ::Transpose(%"_val_38__800") {perm=[0, 1, 2]}
4508 | # n0__824
%"dim__824"<INT64,?> ⬅️ ::Constant() {value_int=1}
4509 | # n1__824
%"dim_0__824"<INT64,?> ⬅️ ::Cast(%"dim__824") {to=7}
4510 | # n2__824
%"unsqueeze_34__800"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_17__800", %"dim_0__824")
4511 | # n0__825
%"mul_82__800"<FLOAT16,?> ⬅️ ::Mul(%"transpose_32__800", %"unsqueeze_33__800")
4512 | # Constant_92__800
%"_val_42__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4513 | # Cast_93__800
%"_val_43__800"<INT64,?> ⬅️ ::Cast(%"_val_42__800") {to=7}
4514 | # Constant_94__800
%"_val_44__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4515 | # Reshape_95__800
%"_val_45__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_43__800", %"_val_44__800") {allowzero=0}
4516 | # Constant_96__800
%"_val_46__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4517 | # Cast_97__800
%"_val_47__800"<INT64,?> ⬅️ ::Cast(%"_val_46__800") {to=7}
4518 | # Constant_98__800
%"_val_48__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4519 | # Reshape_99__800
%"_val_49__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_47__800", %"_val_48__800") {allowzero=0}
4520 | # Constant_100__800
%"_val_50__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4521 | # Cast_101__800
%"_val_51__800"<INT64,?> ⬅️ ::Cast(%"_val_50__800") {to=7}
4522 | # Constant_102__800
%"_val_52__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4523 | # Reshape_103__800
%"_val_53__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_51__800", %"_val_52__800") {allowzero=0}
4524 | # Constant_104__800
%"_val_54__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4525 | # Cast_105__800
%"_val_55__800"<INT64,?> ⬅️ ::Cast(%"_val_54__800") {to=7}
4526 | # Constant_106__800
%"_val_56__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4527 | # Reshape_107__800
%"_val_57__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_55__800", %"_val_56__800") {allowzero=0}
4528 | # Slice_108__800
%"slice_115__800"<FLOAT16,[unk__1161,unk__1162,unk__1163,unk__1164]> ⬅️ ::Slice(%"transpose_32__800", %"_val_45__800", %"_val_49__800", %"_val_53__800", %"_val_57__800")
4529 | # Constant_109__800
%"_val_59__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4530 | # Cast_110__800
%"_val_60__800"<INT64,?> ⬅️ ::Cast(%"_val_59__800") {to=7}
4531 | # Constant_111__800
%"_val_61__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4532 | # Reshape_112__800
%"_val_62__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_60__800", %"_val_61__800") {allowzero=0}
4533 | # Constant_113__800
%"_val_63__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4534 | # Cast_114__800
%"_val_64__800"<INT64,?> ⬅️ ::Cast(%"_val_63__800") {to=7}
4535 | # Constant_115__800
%"_val_65__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4536 | # Reshape_116__800
%"_val_66__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_64__800", %"_val_65__800") {allowzero=0}
4537 | # Constant_117__800
%"_val_67__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4538 | # Cast_118__800
%"_val_68__800"<INT64,?> ⬅️ ::Cast(%"_val_67__800") {to=7}
4539 | # Constant_119__800
%"_val_69__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4540 | # Reshape_120__800
%"_val_70__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_68__800", %"_val_69__800") {allowzero=0}
4541 | # Constant_121__800
%"_val_71__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4542 | # Cast_122__800
%"_val_72__800"<INT64,?> ⬅️ ::Cast(%"_val_71__800") {to=7}
4543 | # Constant_123__800
%"_val_73__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4544 | # Reshape_124__800
%"_val_74__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_72__800", %"_val_73__800") {allowzero=0}
4545 | # Slice_125__800
%"slice_116__800"<FLOAT16,[unk__1165,unk__1166,unk__1167,unk__1168]> ⬅️ ::Slice(%"transpose_32__800", %"_val_62__800", %"_val_66__800", %"_val_70__800", %"_val_74__800")
4546 | # n0__826
%"neg_16__800"<FLOAT16,[unk__1165,unk__1166,unk__1167,unk__1168]> ⬅️ ::Neg(%"slice_116__800")
4547 | # SequenceConstruct_127__800
%"77__800"<Sequence(Tensor(FLOAT16)),[unk__1169,unk__1170,unk__1171,unk__1172]> ⬅️ ::SequenceConstruct(%"neg_16__800", %"slice_115__800")
4548 | # n0__827
%"cat_16__800"<FLOAT16,[unk__1169,unk__1170,unk__1171,unk__1173]> ⬅️ ::ConcatFromSequence(%"77__800") {axis=-1}
4549 | # n0__828
%"mul_83__800"<FLOAT16,?> ⬅️ ::Mul(%"cat_16__800", %"unsqueeze_34__800")
4550 | # n0__829
%"alpha__829"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
4551 | # n1__829
%"alpha_0__829"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__829", %"mul_83__800")
4552 | # n2__829
%"other_1__829"<FLOAT16,?> ⬅️ ::Mul(%"mul_83__800", %"alpha_0__829")
4553 | # n3__829
%"add_49__800"<FLOAT16,?> ⬅️ ::Add(%"mul_82__800", %"other_1__829")
4554 | # n0__830
%"mul_84__800"<FLOAT16,?> ⬅️ ::Mul(%"transpose_33__800", %"unsqueeze_33__800")
4555 | # Constant_132__800
%"_val_82__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4556 | # Cast_133__800
%"_val_83__800"<INT64,?> ⬅️ ::Cast(%"_val_82__800") {to=7}
4557 | # Constant_134__800
%"_val_84__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4558 | # Reshape_135__800
%"_val_85__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_83__800", %"_val_84__800") {allowzero=0}
4559 | # Constant_136__800
%"_val_86__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4560 | # Cast_137__800
%"_val_87__800"<INT64,?> ⬅️ ::Cast(%"_val_86__800") {to=7}
4561 | # Constant_138__800
%"_val_88__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4562 | # Reshape_139__800
%"_val_89__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_87__800", %"_val_88__800") {allowzero=0}
4563 | # Constant_140__800
%"_val_90__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4564 | # Cast_141__800
%"_val_91__800"<INT64,?> ⬅️ ::Cast(%"_val_90__800") {to=7}
4565 | # Constant_142__800
%"_val_92__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4566 | # Reshape_143__800
%"_val_93__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_91__800", %"_val_92__800") {allowzero=0}
4567 | # Constant_144__800
%"_val_94__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4568 | # Cast_145__800
%"_val_95__800"<INT64,?> ⬅️ ::Cast(%"_val_94__800") {to=7}
4569 | # Constant_146__800
%"_val_96__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4570 | # Reshape_147__800
%"_val_97__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_95__800", %"_val_96__800") {allowzero=0}
4571 | # Slice_148__800
%"slice_117__800"<FLOAT16,[unk__1174,unk__1175,unk__1176,unk__1177]> ⬅️ ::Slice(%"transpose_33__800", %"_val_85__800", %"_val_89__800", %"_val_93__800", %"_val_97__800")
4572 | # Constant_149__800
%"_val_99__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4573 | # Cast_150__800
%"_val_100__800"<INT64,?> ⬅️ ::Cast(%"_val_99__800") {to=7}
4574 | # Constant_151__800
%"_val_101__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4575 | # Reshape_152__800
%"_val_102__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_100__800", %"_val_101__800") {allowzero=0}
4576 | # Constant_153__800
%"_val_103__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4577 | # Cast_154__800
%"_val_104__800"<INT64,?> ⬅️ ::Cast(%"_val_103__800") {to=7}
4578 | # Constant_155__800
%"_val_105__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4579 | # Reshape_156__800
%"_val_106__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_104__800", %"_val_105__800") {allowzero=0}
4580 | # Constant_157__800
%"_val_107__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4581 | # Cast_158__800
%"_val_108__800"<INT64,?> ⬅️ ::Cast(%"_val_107__800") {to=7}
4582 | # Constant_159__800
%"_val_109__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4583 | # Reshape_160__800
%"_val_110__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_108__800", %"_val_109__800") {allowzero=0}
4584 | # Constant_161__800
%"_val_111__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4585 | # Cast_162__800
%"_val_112__800"<INT64,?> ⬅️ ::Cast(%"_val_111__800") {to=7}
4586 | # Constant_163__800
%"_val_113__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4587 | # Reshape_164__800
%"_val_114__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_112__800", %"_val_113__800") {allowzero=0}
4588 | # Slice_165__800
%"slice_118__800"<FLOAT16,[unk__1178,unk__1179,unk__1180,unk__1181]> ⬅️ ::Slice(%"transpose_33__800", %"_val_102__800", %"_val_106__800", %"_val_110__800", %"_val_114__800")
4589 | # n0__831
%"neg_17__800"<FLOAT16,[unk__1178,unk__1179,unk__1180,unk__1181]> ⬅️ ::Neg(%"slice_118__800")
4590 | # SequenceConstruct_167__800
%"117__800"<Sequence(Tensor(FLOAT16)),[unk__1182,unk__1183,unk__1184,unk__1185]> ⬅️ ::SequenceConstruct(%"neg_17__800", %"slice_117__800")
4591 | # n0__832
%"cat_17__800"<FLOAT16,[unk__1182,unk__1183,unk__1184,unk__1186]> ⬅️ ::ConcatFromSequence(%"117__800") {axis=-1}
4592 | # n0__833
%"mul_85__800"<FLOAT16,?> ⬅️ ::Mul(%"cat_17__800", %"unsqueeze_34__800")
4593 | # n0__834
%"alpha__834"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
4594 | # n1__834
%"alpha_0__834"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__834", %"mul_85__800")
4595 | # n2__834
%"other_1__834"<FLOAT16,?> ⬅️ ::Mul(%"mul_85__800", %"alpha_0__834")
4596 | # n3__834
%"model_1_17"<FLOAT16,[1,8,128,128]> ⬅️ ::Add(%"mul_84__800", %"other_1__834")
4597 | # Constant_171__800
%"_val_121__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4598 | # Cast_172__800
%"_val_122__800"<INT64,?> ⬅️ ::Cast(%"_val_121__800") {to=7}
4599 | # Constant_173__800
%"_val_123__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4600 | # Reshape_174__800
%"_val_124__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_122__800", %"_val_123__800") {allowzero=0}
4601 | # Constant_175__800
%"_val_125__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4602 | # Cast_176__800
%"_val_126__800"<INT64,?> ⬅️ ::Cast(%"_val_125__800") {to=7}
4603 | # Constant_177__800
%"_val_127__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4604 | # Reshape_178__800
%"_val_128__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_126__800", %"_val_127__800") {allowzero=0}
4605 | # Constant_179__800
%"_val_129__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4606 | # Cast_180__800
%"_val_130__800"<INT64,?> ⬅️ ::Cast(%"_val_129__800") {to=7}
4607 | # Constant_181__800
%"_val_131__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4608 | # Reshape_182__800
%"_val_132__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_130__800", %"_val_131__800") {allowzero=0}
4609 | # Constant_183__800
%"_val_133__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4610 | # Cast_184__800
%"_val_134__800"<INT64,?> ⬅️ ::Cast(%"_val_133__800") {to=7}
4611 | # Constant_185__800
%"_val_135__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4612 | # Reshape_186__800
%"_val_136__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_134__800", %"_val_135__800") {allowzero=0}
4613 | # Slice_187__800
%"slice_119__800"<FLOAT16,[unk__1187,unk__1188,unk__1189,unk__1190]> ⬅️ ::Slice(%"model_1_17", %"_val_124__800", %"_val_128__800", %"_val_132__800", %"_val_136__800")
4614 | # Constant_188__800
%"_val_138__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4615 | # Cast_189__800
%"_val_139__800"<INT64,?> ⬅️ ::Cast(%"_val_138__800") {to=7}
4616 | # Constant_190__800
%"_val_140__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4617 | # Reshape_191__800
%"_val_141__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_139__800", %"_val_140__800") {allowzero=0}
4618 | # Constant_192__800
%"_val_142__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4619 | # Cast_193__800
%"_val_143__800"<INT64,?> ⬅️ ::Cast(%"_val_142__800") {to=7}
4620 | # Constant_194__800
%"_val_144__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4621 | # Reshape_195__800
%"_val_145__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_143__800", %"_val_144__800") {allowzero=0}
4622 | # Constant_196__800
%"_val_146__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4623 | # Cast_197__800
%"_val_147__800"<INT64,?> ⬅️ ::Cast(%"_val_146__800") {to=7}
4624 | # Constant_198__800
%"_val_148__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4625 | # Reshape_199__800
%"_val_149__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_147__800", %"_val_148__800") {allowzero=0}
4626 | # Constant_200__800
%"_val_150__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4627 | # Cast_201__800
%"_val_151__800"<INT64,?> ⬅️ ::Cast(%"_val_150__800") {to=7}
4628 | # Constant_202__800
%"_val_152__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4629 | # Reshape_203__800
%"_val_153__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_151__800", %"_val_152__800") {allowzero=0}
4630 | # Slice_204__800
%"slice_120__800"<FLOAT16,[unk__1191,unk__1192,unk__1193,unk__1194]> ⬅️ ::Slice(%"slice_119__800", %"_val_141__800", %"_val_145__800", %"_val_149__800", %"_val_153__800")
4631 | # n0__835
%"dim__835"<INT64,?> ⬅️ ::Constant() {value_int=2}
4632 | # n1__835
%"dim_0__835"<INT64,?> ⬅️ ::Cast(%"dim__835") {to=7}
4633 | # n2__835
%"unsqueeze_35__800"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_120__800", %"dim_0__835")
4634 | # Constant_206__800
%"_val_156__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4635 | # Cast_207__800
%"_val_157__800"<INT64,?> ⬅️ ::Cast(%"_val_156__800") {to=7}
4636 | # Constant_208__800
%"_val_158__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4637 | # Reshape_209__800
%"_val_159__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_157__800", %"_val_158__800") {allowzero=0}
4638 | # Constant_210__800
%"_val_160__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4639 | # Cast_211__800
%"_val_161__800"<INT64,?> ⬅️ ::Cast(%"_val_160__800") {to=7}
4640 | # Constant_212__800
%"_val_162__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4641 | # Reshape_213__800
%"_val_163__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_161__800", %"_val_162__800") {allowzero=0}
4642 | # Constant_214__800
%"_val_164__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4643 | # Cast_215__800
%"_val_165__800"<INT64,?> ⬅️ ::Cast(%"_val_164__800") {to=7}
4644 | # Constant_216__800
%"_val_166__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4645 | # Reshape_217__800
%"_val_167__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_165__800", %"_val_166__800") {allowzero=0}
4646 | # Constant_218__800
%"_val_168__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4647 | # Cast_219__800
%"_val_169__800"<INT64,?> ⬅️ ::Cast(%"_val_168__800") {to=7}
4648 | # Constant_220__800
%"_val_170__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4649 | # Reshape_221__800
%"_val_171__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_169__800", %"_val_170__800") {allowzero=0}
4650 | # Slice_222__800
%"slice_121__800"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_35__800", %"_val_159__800", %"_val_163__800", %"_val_167__800", %"_val_171__800")
4651 | # Constant_223__800
%"_val_173__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4652 | # Cast_224__800
%"_val_174__800"<INT64,?> ⬅️ ::Cast(%"_val_173__800") {to=7}
4653 | # Constant_225__800
%"_val_175__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4654 | # Reshape_226__800
%"_val_176__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_174__800", %"_val_175__800") {allowzero=0}
4655 | # Constant_227__800
%"_val_177__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4656 | # Cast_228__800
%"_val_178__800"<INT64,?> ⬅️ ::Cast(%"_val_177__800") {to=7}
4657 | # Constant_229__800
%"_val_179__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4658 | # Reshape_230__800
%"_val_180__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_178__800", %"_val_179__800") {allowzero=0}
4659 | # Constant_231__800
%"_val_181__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4660 | # Cast_232__800
%"_val_182__800"<INT64,?> ⬅️ ::Cast(%"_val_181__800") {to=7}
4661 | # Constant_233__800
%"_val_183__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4662 | # Reshape_234__800
%"_val_184__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_182__800", %"_val_183__800") {allowzero=0}
4663 | # Constant_235__800
%"_val_185__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4664 | # Cast_236__800
%"_val_186__800"<INT64,?> ⬅️ ::Cast(%"_val_185__800") {to=7}
4665 | # Constant_237__800
%"_val_187__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4666 | # Reshape_238__800
%"_val_188__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_186__800", %"_val_187__800") {allowzero=0}
4667 | # Slice_239__800
%"slice_122__800"<FLOAT16,?> ⬅️ ::Slice(%"slice_121__800", %"_val_176__800", %"_val_180__800", %"_val_184__800", %"_val_188__800")
4668 | # Constant_240__800
%"_val_190__800"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
4669 | # n0__836
%"size_0__836"<INT64,[5]> ⬅️ ::Cast(%"_val_190__800") {to=7}
4670 | # n1__836
%"size_1__836"<INT64,[5]> ⬅️ ::Abs(%"size_0__836")
4671 | # n2__836
%"expand_16__800"<FLOAT16,?> ⬅️ ::Expand(%"slice_122__800", %"size_1__836")
4672 | # n0__837
%"clone_16__800"<FLOAT16,?> ⬅️ ::Identity(%"expand_16__800")
4673 | # Constant_243__800
%"_val_193__800"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
4674 | # n0__838
%"size_0__838"<INT64,[4]> ⬅️ ::Cast(%"_val_193__800") {to=7}
4675 | # n1__838
%"view_170__800"<FLOAT16,[unk__1195,unk__1196,unk__1197,unk__1198]> ⬅️ ::Reshape(%"clone_16__800", %"size_0__838")
4676 | # Constant_245__800
%"_val_195__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4677 | # Cast_246__800
%"_val_196__800"<INT64,?> ⬅️ ::Cast(%"_val_195__800") {to=7}
4678 | # Constant_247__800
%"_val_197__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4679 | # Reshape_248__800
%"_val_198__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_196__800", %"_val_197__800") {allowzero=0}
4680 | # Constant_249__800
%"_val_199__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4681 | # Cast_250__800
%"_val_200__800"<INT64,?> ⬅️ ::Cast(%"_val_199__800") {to=7}
4682 | # Constant_251__800
%"_val_201__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4683 | # Reshape_252__800
%"_val_202__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_200__800", %"_val_201__800") {allowzero=0}
4684 | # Constant_253__800
%"_val_203__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4685 | # Cast_254__800
%"_val_204__800"<INT64,?> ⬅️ ::Cast(%"_val_203__800") {to=7}
4686 | # Constant_255__800
%"_val_205__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4687 | # Reshape_256__800
%"_val_206__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_204__800", %"_val_205__800") {allowzero=0}
4688 | # Constant_257__800
%"_val_207__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4689 | # Cast_258__800
%"_val_208__800"<INT64,?> ⬅️ ::Cast(%"_val_207__800") {to=7}
4690 | # Constant_259__800
%"_val_209__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4691 | # Reshape_260__800
%"_val_210__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_208__800", %"_val_209__800") {allowzero=0}
4692 | # Slice_261__800
%"slice_123__800"<FLOAT16,[unk__1199,unk__1200,unk__1201,unk__1202]> ⬅️ ::Slice(%"model_1_16", %"_val_198__800", %"_val_202__800", %"_val_206__800", %"_val_210__800")
4693 | # Constant_262__800
%"_val_212__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4694 | # Cast_263__800
%"_val_213__800"<INT64,?> ⬅️ ::Cast(%"_val_212__800") {to=7}
4695 | # Constant_264__800
%"_val_214__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4696 | # Reshape_265__800
%"_val_215__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_213__800", %"_val_214__800") {allowzero=0}
4697 | # Constant_266__800
%"_val_216__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4698 | # Cast_267__800
%"_val_217__800"<INT64,?> ⬅️ ::Cast(%"_val_216__800") {to=7}
4699 | # Constant_268__800
%"_val_218__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4700 | # Reshape_269__800
%"_val_219__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_217__800", %"_val_218__800") {allowzero=0}
4701 | # Constant_270__800
%"_val_220__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4702 | # Cast_271__800
%"_val_221__800"<INT64,?> ⬅️ ::Cast(%"_val_220__800") {to=7}
4703 | # Constant_272__800
%"_val_222__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4704 | # Reshape_273__800
%"_val_223__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_221__800", %"_val_222__800") {allowzero=0}
4705 | # Constant_274__800
%"_val_224__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4706 | # Cast_275__800
%"_val_225__800"<INT64,?> ⬅️ ::Cast(%"_val_224__800") {to=7}
4707 | # Constant_276__800
%"_val_226__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4708 | # Reshape_277__800
%"_val_227__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_225__800", %"_val_226__800") {allowzero=0}
4709 | # Slice_278__800
%"slice_124__800"<FLOAT16,[unk__1203,unk__1204,unk__1205,unk__1206]> ⬅️ ::Slice(%"slice_123__800", %"_val_215__800", %"_val_219__800", %"_val_223__800", %"_val_227__800")
4710 | # n0__839
%"dim__839"<INT64,?> ⬅️ ::Constant() {value_int=2}
4711 | # n1__839
%"dim_0__839"<INT64,?> ⬅️ ::Cast(%"dim__839") {to=7}
4712 | # n2__839
%"unsqueeze_36__800"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_124__800", %"dim_0__839")
4713 | # Constant_280__800
%"_val_230__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4714 | # Cast_281__800
%"_val_231__800"<INT64,?> ⬅️ ::Cast(%"_val_230__800") {to=7}
4715 | # Constant_282__800
%"_val_232__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4716 | # Reshape_283__800
%"_val_233__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_231__800", %"_val_232__800") {allowzero=0}
4717 | # Constant_284__800
%"_val_234__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4718 | # Cast_285__800
%"_val_235__800"<INT64,?> ⬅️ ::Cast(%"_val_234__800") {to=7}
4719 | # Constant_286__800
%"_val_236__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4720 | # Reshape_287__800
%"_val_237__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_235__800", %"_val_236__800") {allowzero=0}
4721 | # Constant_288__800
%"_val_238__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4722 | # Cast_289__800
%"_val_239__800"<INT64,?> ⬅️ ::Cast(%"_val_238__800") {to=7}
4723 | # Constant_290__800
%"_val_240__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4724 | # Reshape_291__800
%"_val_241__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_239__800", %"_val_240__800") {allowzero=0}
4725 | # Constant_292__800
%"_val_242__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4726 | # Cast_293__800
%"_val_243__800"<INT64,?> ⬅️ ::Cast(%"_val_242__800") {to=7}
4727 | # Constant_294__800
%"_val_244__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4728 | # Reshape_295__800
%"_val_245__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_243__800", %"_val_244__800") {allowzero=0}
4729 | # Slice_296__800
%"slice_125__800"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_36__800", %"_val_233__800", %"_val_237__800", %"_val_241__800", %"_val_245__800")
4730 | # Constant_297__800
%"_val_247__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4731 | # Cast_298__800
%"_val_248__800"<INT64,?> ⬅️ ::Cast(%"_val_247__800") {to=7}
4732 | # Constant_299__800
%"_val_249__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4733 | # Reshape_300__800
%"_val_250__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_248__800", %"_val_249__800") {allowzero=0}
4734 | # Constant_301__800
%"_val_251__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4735 | # Cast_302__800
%"_val_252__800"<INT64,?> ⬅️ ::Cast(%"_val_251__800") {to=7}
4736 | # Constant_303__800
%"_val_253__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4737 | # Reshape_304__800
%"_val_254__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_252__800", %"_val_253__800") {allowzero=0}
4738 | # Constant_305__800
%"_val_255__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4739 | # Cast_306__800
%"_val_256__800"<INT64,?> ⬅️ ::Cast(%"_val_255__800") {to=7}
4740 | # Constant_307__800
%"_val_257__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4741 | # Reshape_308__800
%"_val_258__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_256__800", %"_val_257__800") {allowzero=0}
4742 | # Constant_309__800
%"_val_259__800"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4743 | # Cast_310__800
%"_val_260__800"<INT64,?> ⬅️ ::Cast(%"_val_259__800") {to=7}
4744 | # Constant_311__800
%"_val_261__800"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4745 | # Reshape_312__800
%"_val_262__800"<INT64,[1]> ⬅️ ::Reshape(%"_val_260__800", %"_val_261__800") {allowzero=0}
4746 | # Slice_313__800
%"slice_126__800"<FLOAT16,?> ⬅️ ::Slice(%"slice_125__800", %"_val_250__800", %"_val_254__800", %"_val_258__800", %"_val_262__800")
4747 | # Constant_314__800
%"_val_264__800"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
4748 | # n0__840
%"size_0__840"<INT64,[5]> ⬅️ ::Cast(%"_val_264__800") {to=7}
4749 | # n1__840
%"size_1__840"<INT64,[5]> ⬅️ ::Abs(%"size_0__840")
4750 | # n2__840
%"expand_17__800"<FLOAT16,?> ⬅️ ::Expand(%"slice_126__800", %"size_1__840")
4751 | # n0__841
%"clone_17__800"<FLOAT16,?> ⬅️ ::Identity(%"expand_17__800")
4752 | # Constant_317__800
%"_val_267__800"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
4753 | # n0__842
%"size_0__842"<INT64,[4]> ⬅️ ::Cast(%"_val_267__800") {to=7}
4754 | # n1__842
%"view_171__800"<FLOAT16,[unk__1207,unk__1208,unk__1209,unk__1210]> ⬅️ ::Reshape(%"clone_17__800", %"size_0__842")
4755 | # n0__843
%"tmp__843"<INT64,[unk__1211]> ⬅️ ::Shape(%"add_49__800")
4756 | # n1__843
%"int64_m1__843"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_m1')}
4757 | # n2__843
%"tmp_subscripted__843"<INT64,?> ⬅️ ::Gather(%"tmp__843", %"int64_m1__843") {axis=0}
4758 | # n3__843
%"embedding_size__843"<FLOAT16,?> ⬅️ ::CastLike(%"tmp_subscripted__843", %"add_49__800")
4759 | # n4__843
%"const__843"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
4760 | # n5__843
%"tmp_0__843"<FLOAT16,?> ⬅️ ::Sqrt(%"embedding_size__843")
4761 | # n6__843
%"const_cast__843"<FLOAT16,?> ⬅️ ::CastLike(%"const__843", %"tmp_0__843")
4762 | # n7__843
%"_val_269__800"<FLOAT16,?> ⬅️ ::Div(%"const_cast__843", %"tmp_0__843")
4763 | # CastLike_320__800
%"_val_270__800"<FLOAT16,?> ⬅️ ::CastLike(%"_val_269__800", %"add_49__800")
4764 | # n0__844
%"tmp__844"<INT64,[unk__1212]> ⬅️ ::Shape(%"add_49__800")
4765 | # n1__844
%"int64_0_1d__844"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
4766 | # n2__844
%"int64_1_1d__844"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
4767 | # n3__844
%"int64_m2_1d__844"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
4768 | # n4__844
%"int64_m1_1d__844"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
4769 | # n5__844
%"target_length__844"<INT64,[unk__1213]> ⬅️ ::Slice(%"tmp__844", %"int64_m2_1d__844", %"int64_m1_1d__844", %"int64_0_1d__844", %"int64_1_1d__844")
4770 | # n6__844
%"tmp_0__844"<INT64,[4]> ⬅️ ::Shape(%"view_170__800")
4771 | # n7__844
%"int64_0_1d_1__844"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
4772 | # n8__844
%"int64_1_1d_2__844"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
4773 | # n9__844
%"int64_m2_1d_3__844"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_3')}
4774 | # n10__844
%"int64_m1_1d_4__844"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_4')}
4775 | # n11__844
%"source_length__844"<INT64,[1]> ⬅️ ::Slice(%"tmp_0__844", %"int64_m2_1d_3__844", %"int64_m1_1d_4__844", %"int64_0_1d_1__844", %"int64_1_1d_2__844")
4776 | # n12__844
%"size__844"<INT64,[unk__1214]> ⬅️ ::Concat(%"target_length__844", %"source_length__844") {axis=0}
4777 | # n13__844
%"const__844"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
4778 | # n14__844
%"attn_mask__844"<FLOAT,?> ⬅️ ::Expand(%"const__844", %"size__844")
4779 | # n15__844
%"attn_mask_5__844"<FLOAT,?> ⬅️ ::Trilu(%"attn_mask__844") {upper=0}
4780 | # n16__844
%"const_6__844"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_6')}
4781 | # n17__844
%"const_6_cast__844"<FLOAT,?> ⬅️ ::CastLike(%"const_6__844", %"attn_mask_5__844")
4782 | # n18__844
%"tmp_7__844"<BOOL,?> ⬅️ ::Equal(%"attn_mask_5__844", %"const_6_cast__844")
4783 | # n19__844
%"tmp_8__844"<FLOAT,?> ⬅️ ::Constant() {value_float=-inf}
4784 | # n20__844
%"const_9__844"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
4785 | # n21__844
%"const_9_cast__844"<FLOAT,?> ⬅️ ::CastLike(%"const_9__844", %"tmp_8__844")
4786 | # n22__844
%"attn_mask_10__844"<FLOAT,?> ⬅️ ::Where(%"tmp_7__844", %"tmp_8__844", %"const_9_cast__844")
4787 | # n23__844
%"_val_271__800"<FLOAT16,?> ⬅️ ::CastLike(%"attn_mask_10__844", %"add_49__800")
4788 | # n0__845
%"key_shape__845"<INT64,[4]> ⬅️ ::Shape(%"view_170__800")
4789 | # n1__845
%"int64_0_1d__845"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
4790 | # n2__845
%"int64_1_1d__845"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
4791 | # n3__845
%"int64_m1_1d__845"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
4792 | # n4__845
%"int64_9223372036854775807_1d__845"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_9223372036854775807_1d')}
4793 | # n5__845
%"key_last_dim__845"<INT64,[1]> ⬅️ ::Slice(%"key_shape__845", %"int64_m1_1d__845", %"int64_9223372036854775807_1d__845", %"int64_0_1d__845", %"int64_1_1d__845")
4794 | # n6__845
%"int64_0_1d_0__845"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_0')}
4795 | # n7__845
%"int64_1_1d_1__845"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_1')}
4796 | # n8__845
%"int64_m2_1d__845"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
4797 | # n9__845
%"int64_m1_1d_2__845"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_2')}
4798 | # n10__845
%"key_second_last_dim__845"<INT64,[1]> ⬅️ ::Slice(%"key_shape__845", %"int64_m2_1d__845", %"int64_m1_1d_2__845", %"int64_0_1d_0__845", %"int64_1_1d_1__845")
4799 | # n11__845
%"int64_0_1d_3__845"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
4800 | # n12__845
%"int64_1_1d_4__845"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
4801 | # n13__845
%"int64_m2_1d_5__845"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_5')}
4802 | # n14__845
%"key_first_dims__845"<INT64,[2]> ⬅️ ::Slice(%"key_shape__845", %"int64_0_1d_3__845", %"int64_m2_1d_5__845", %"int64_0_1d_3__845", %"int64_1_1d_4__845")
4803 | # n15__845
%"tmp__845"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
4804 | # n16__845
%"key_squeezed_shape__845"<INT64,[3]> ⬅️ ::Concat(%"tmp__845", %"key_second_last_dim__845", %"key_last_dim__845") {axis=0}
4805 | # n17__845
%"key_squeezed__845"<FLOAT16,[unk__1215,unk__1216,unk__1217]> ⬅️ ::Reshape(%"view_170__800", %"key_squeezed_shape__845")
4806 | # n18__845
%"key_squeezed_transposed__845"<FLOAT16,[unk__1215,unk__1217,unk__1216]> ⬅️ ::Transpose(%"key_squeezed__845") {perm=[0, 2, 1]}
4807 | # n19__845
%"key_transposed_shape__845"<INT64,[4]> ⬅️ ::Concat(%"key_first_dims__845", %"key_last_dim__845", %"key_second_last_dim__845") {axis=0}
4808 | # n20__845
%"key_transposed__845"<FLOAT16,[unk__1218,unk__1219,unk__1220,unk__1221]> ⬅️ ::Reshape(%"key_squeezed_transposed__845", %"key_transposed_shape__845")
4809 | # n21__845
%"tmp_6__845"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__800")
4810 | # n22__845
%"query_scaled__845"<FLOAT16,?> ⬅️ ::Mul(%"add_49__800", %"tmp_6__845")
4811 | # n23__845
%"tmp_7__845"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__800")
4812 | # n24__845
%"key_transposed_scaled__845"<FLOAT16,[unk__1218,unk__1219,unk__1220,unk__1221]> ⬅️ ::Mul(%"key_transposed__845", %"tmp_7__845")
4813 | # n25__845
%"tmp_8__845"<FLOAT16,?> ⬅️ ::MatMul(%"query_scaled__845", %"key_transposed_scaled__845")
4814 | # n26__845
%"tmp_9__845"<FLOAT16,?> ⬅️ ::Add(%"tmp_8__845", %"_val_271__800")
4815 | # n27__845
%"attn_weight__845"<FLOAT16,?> ⬅️ ::Softmax(%"tmp_9__845") {axis=-1}
4816 | # n28__845
%"dropout_p__845"<FLOAT,?> ⬅️ ::Constant() {value_float=0.0}
4817 | # n29__845
%"attn_weight_10__845"<FLOAT16,?>, %"___845"<BOOL,?> ⬅️ ::Dropout(%"attn_weight__845", %"dropout_p__845")
4818 | # n30__845
%"_scaled_dot_product_efficient_attention_8__800"<FLOAT16,?> ⬅️ ::MatMul(%"attn_weight_10__845", %"view_171__800")
4819 | # n0__846
%"query_0__846"<FLOAT16,?> ⬅️ ::Transpose(%"add_49__800") {perm=[0, 2, 1, 3]}
4820 | # n1__846
%"query_shape__846"<INT64,[unk__1222]> ⬅️ ::Shape(%"query_0__846")
4821 | # n2__846
%"int64_0_1d__846"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
4822 | # n3__846
%"int64_1_1d__846"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
4823 | # n4__846
%"query_first_dims__846"<INT64,[unk__1223]> ⬅️ ::Slice(%"query_shape__846", %"int64_0_1d__846", %"int64_1_1d__846", %"int64_0_1d__846", %"int64_1_1d__846")
4824 | # n5__846
%"int64_0_1d_1__846"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
4825 | # n6__846
%"int64_1_1d_2__846"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
4826 | # n7__846
%"int64_2_1d__846"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_2_1d')}
4827 | # n8__846
%"query_second_dims__846"<INT64,[unk__1224]> ⬅️ ::Slice(%"query_shape__846", %"int64_1_1d_2__846", %"int64_2_1d__846", %"int64_0_1d_1__846", %"int64_1_1d_2__846")
4828 | # n9__846
%"int64_0_1d_3__846"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
4829 | # n10__846
%"int64_1_1d_4__846"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
4830 | # n11__846
%"int64_m2_1d__846"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
4831 | # n12__846
%"int64_m1_1d__846"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
4832 | # n13__846
%"num_heads__846"<INT64,[unk__1225]> ⬅️ ::Slice(%"query_shape__846", %"int64_m2_1d__846", %"int64_m1_1d__846", %"int64_0_1d_3__846", %"int64_1_1d_4__846")
4833 | # n14__846
%"compute_log_sumexp__846"<INT64,?> ⬅️ ::Constant() {value_int=0}
4834 | # n15__846
%"compute_log_sumexp_as_bool__846"<BOOL,?> ⬅️ ::Cast(%"compute_log_sumexp__846") {to=9}
4835 | # n16__846
%"_scaled_dot_product_efficient_attention_8_1__800"<FLOAT,?> ⬅️ ::If(%"compute_log_sumexp_as_bool__846") {then_branch=
graph(
name=thenGraph_14,
inputs=(
),
outputs=(
%"logsum_exp__846"<FLOAT,?>
),
) {
0 | # n0__846_356
%"tmp__846"<FLOAT,[unk__1224]> ⬅️ ::Cast(%"query_second_dims__846") {to=1}
1 | # n1__846_357
%"const__846"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
2 | # n2__846_358
%"const_cast__846"<FLOAT,?> ⬅️ ::CastLike(%"const__846", %"tmp__846")
3 | # n3__846_359
%"tmp_5__846"<FLOAT,[unk__1224]> ⬅️ ::Div(%"tmp__846", %"const_cast__846")
4 | # n4__846_360
%"tmp_6__846"<FLOAT,[unk__1224]> ⬅️ ::Ceil(%"tmp_5__846")
5 | # n5__846_361
%"const_7__846"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_7')}
6 | # n6__846_362
%"const_7_cast__846"<FLOAT,?> ⬅️ ::CastLike(%"const_7__846", %"tmp_6__846")
7 | # n7__846_363
%"tmp_8__846"<FLOAT,[unk__1224]> ⬅️ ::Mul(%"tmp_6__846", %"const_7_cast__846")
8 | # n8__846_364
%"logsumexp_dim__846"<INT64,[unk__1224]> ⬅️ ::Cast(%"tmp_8__846") {to=7}
9 | # n9__846_365
%"const_9__846"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
10 | # n10__846_366
%"tmp_10__846"<INT64,[unk__1226]> ⬅️ ::Concat(%"query_first_dims__846", %"num_heads__846", %"logsumexp_dim__846") {axis=0}
11 | # n11__846_367
%"logsum_exp__846"<FLOAT,?> ⬅️ ::Expand(%"const_9__846", %"tmp_10__846")
return %"logsum_exp__846"<FLOAT,?>
}, else_branch=
graph(
name=elseGraph_14,
inputs=(
),
outputs=(
%"logsum_exp_14__846"<FLOAT,?>
),
) {
0 | # n0__846_368
%"const_11__846"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_11')}
1 | # n1__846_369
%"int64_0_1d_12__846"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_12')}
2 | # n2__846_370
%"int64_0_1d_12_cast__846"<INT64,[1]> ⬅️ ::CastLike(%"int64_0_1d_12__846", %"num_heads__846")
3 | # n3__846_371
%"tmp_13__846"<INT64,[unk__1227]> ⬅️ ::Concat(%"query_first_dims__846", %"num_heads__846", %"int64_0_1d_12_cast__846") {axis=0}
4 | # n4__846_372
%"logsum_exp_14__846"<FLOAT,?> ⬅️ ::Expand(%"const_11__846", %"tmp_13__846")
return %"logsum_exp_14__846"<FLOAT,?>
}}
4836 | # n17__846
%"tmp_16__846"<INT64,[0]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[0]>('Empty_INTS')}
4837 | # n18__846
%"tmp_17__846"<FLOAT,?> ⬅️ ::ConstantOfShape(%"tmp_16__846")
4838 | # n19__846
%"_scaled_dot_product_efficient_attention_8_3__800"<INT64,?> ⬅️ ::Cast(%"tmp_17__846") {to=7}
4839 | # Transpose_324__800
%"transpose_35__800"<FLOAT16,?> ⬅️ ::Transpose(%"_scaled_dot_product_efficient_attention_8__800") {perm=[0, 2, 1, 3]}
4840 | # Constant_325__800
%"_val_276__800"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4841 | # n0__847
%"size_0__847"<INT64,[3]> ⬅️ ::Cast(%"_val_276__800") {to=7}
4842 | # n1__847
%"view_172__800"<FLOAT16,[unk__1228,unk__1229,unk__1230]> ⬅️ ::Reshape(%"transpose_35__800", %"size_0__847")
4843 | # n0__850
%"tmp__850"<INT64,[2]> ⬅️ ::Shape(%"model.layers.8.self_attn.o_proj.weight")
4844 | # n1__850
%"rank__849"<INT64,?> ⬅️ ::Size(%"tmp__850")
4845 | # n1__849
%"int64_2__849"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
4846 | # n2__849
%"int64_2_cast__849"<INT64,?> ⬅️ ::CastLike(%"int64_2__849", %"rank__849")
4847 | # n3__849
%"cond__849"<BOOL,?> ⬅️ ::Equal(%"rank__849", %"int64_2_cast__849")
4848 | # n4__849
%"t_59__848"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__849") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__849"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__849_373
%"result__849"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.8.self_attn.o_proj.weight") {perm=[1, 0]}
return %"result__849"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__849"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__849_374
%"result_0__849"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.8.self_attn.o_proj.weight")
return %"result_0__849"<FLOAT16,[4096,4096]>
}}
4849 | # Constant_3__848
%"_val_3__848"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
4850 | # n0__851
%"size_0__851"<INT64,[2]> ⬅️ ::Cast(%"_val_3__848") {to=7}
4851 | # n1__851
%"view_173__848"<FLOAT16,[unk__1231,unk__1232]> ⬅️ ::Reshape(%"view_172__800", %"size_0__851")
4852 | # n0__852
%"mm_59__848"<FLOAT16,[unk__1231,4096]> ⬅️ ::MatMul(%"view_173__848", %"t_59__848")
4853 | # Constant_6__848
%"_val_6__848"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4854 | # n0__853
%"size_0__853"<INT64,[3]> ⬅️ ::Cast(%"_val_6__848") {to=7}
4855 | # n1__853
%"model_layers_8_self_attn_1_2__790"<FLOAT16,[unk__1233,unk__1234,unk__1235]> ⬅️ ::Reshape(%"mm_59__848", %"size_0__853")
4856 | # n0__854
%"alpha__854"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
4857 | # n1__854
%"alpha_0__854"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__854", %"model_layers_8_self_attn_1_2__790")
4858 | # n2__854
%"other_1__854"<FLOAT16,[unk__1233,unk__1234,unk__1235]> ⬅️ ::Mul(%"model_layers_8_self_attn_1_2__790", %"alpha_0__854")
4859 | # n3__854
%"add_51__790"<FLOAT16,[unk__1236,128,4096]> ⬅️ ::Add(%"model_layers_7_1_2__1", %"other_1__854")
4860 | # Cast_3__855
%"_to_copy_50__855"<FLOAT,[unk__1236,128,4096]> ⬅️ ::Cast(%"add_51__790") {to=1}
4861 | # Constant_4__855
%"_val_2__855"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4862 | # Cast_5__855
%"scalar_tensor_default_17__855"<FLOAT,?> ⬅️ ::Cast(%"_val_2__855") {to=1}
4863 | # n0__856
%"pow_18__855"<FLOAT,[unk__1236,128,4096]> ⬅️ ::Pow(%"_to_copy_50__855", %"scalar_tensor_default_17__855")
4864 | # Constant_7__855
%"_val_5__855"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
4865 | # n0__858
%"tmp__858"<INT64,[3]> ⬅️ ::Shape(%"pow_18__855")
4866 | # n1__858
%"tmp_0__858"<INT64,?> ⬅️ ::Size(%"tmp__858")
4867 | # n2__858
%"tmp_1__858"<INT64,?> ⬅️ ::Constant() {value_int=0}
4868 | # n3__858
%"cond__857"<BOOL,?> ⬅️ ::Equal(%"tmp_0__858", %"tmp_1__858")
4869 | # n1__857
%"mean_17__855"<FLOAT,?> ⬅️ ::If(%"cond__857") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__857"<FLOAT,[unk__1236,128,4096]>
),
) {
0 | # n0__857_375
%"result__857"<FLOAT,[unk__1236,128,4096]> ⬅️ ::Identity(%"pow_18__855")
return %"result__857"<FLOAT,[unk__1236,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__857"<FLOAT,?>
),
) {
0 | # n0__859
%"tmp__859"<INT64,[1]> ⬅️ ::Shape(%"_val_5__855")
1 | # n1__859
%"tmp_0__859"<INT64,?> ⬅️ ::Size(%"tmp__859")
2 | # n2__859
%"tmp_1__859"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__859
%"cond_0__857"<BOOL,?> ⬅️ ::Equal(%"tmp_0__859", %"tmp_1__859")
4 | # n1__857_377
%"dim_3__857"<INT64,?> ⬅️ ::If(%"cond_0__857") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__857"<INT64,[1,1]>
),
) {
0 | # n0__857_378
%"int64_0__857"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__857_379
%"dim_1__857"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__855", %"int64_0__857")
return %"dim_1__857"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__857"<INT64,[1]>
),
) {
0 | # n0__857_380
%"dim_2__857"<INT64,[1]> ⬅️ ::Identity(%"_val_5__855")
return %"dim_2__857"<INT64,[1]>
}}
5 | # n2__857
%"result_4__857"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_18__855", %"dim_3__857") {keepdims=1}
return %"result_4__857"<FLOAT,?>
}}
4870 | # Constant_9__855
%"_val_7__855"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
4871 | # n0__860
%"alpha__860"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
4872 | # n1__860
%"alpha_0__860"<FLOAT,?> ⬅️ ::CastLike(%"alpha__860", %"_val_7__855")
4873 | # n2__860
%"other_1__860"<FLOAT,?> ⬅️ ::Mul(%"_val_7__855", %"alpha_0__860")
4874 | # n3__860
%"add_52__855"<FLOAT,?> ⬅️ ::Add(%"mean_17__855", %"other_1__860")
4875 | # n0__861
%"tmp__861"<FLOAT,?> ⬅️ ::Sqrt(%"add_52__855")
4876 | # n1__861
%"rsqrt_17__855"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__861")
4877 | # n0__862
%"mul_86__855"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_50__855", %"rsqrt_17__855")
4878 | # Cast_13__855
%"_to_copy_51__855"<FLOAT16,?> ⬅️ ::Cast(%"mul_86__855") {to=10}
4879 | # n0__863
%"model_layers_8_post_attention_layernorm_1__790"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.8.post_attention_layernorm.weight", %"_to_copy_51__855")
4880 | # n0__867
%"tmp__867"<INT64,[2]> ⬅️ ::Shape(%"model.layers.8.mlp.gate_proj.weight")
4881 | # n1__867
%"rank__866"<INT64,?> ⬅️ ::Size(%"tmp__867")
4882 | # n1__866
%"int64_2__866"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
4883 | # n2__866
%"int64_2_cast__866"<INT64,?> ⬅️ ::CastLike(%"int64_2__866", %"rank__866")
4884 | # n3__866
%"cond__866"<BOOL,?> ⬅️ ::Equal(%"rank__866", %"int64_2_cast__866")
4885 | # n4__866
%"t_60__865"<FLOAT16,[unk__1237,unk__1238]> ⬅️ ::If(%"cond__866") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__866"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__866_381
%"result__866"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.8.mlp.gate_proj.weight") {perm=[1, 0]}
return %"result__866"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__866"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__866_382
%"result_0__866"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.8.mlp.gate_proj.weight")
return %"result_0__866"<FLOAT16,[14336,4096]>
}}
4886 | # Constant_3__865
%"_val_3__865"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
4887 | # n0__868
%"size_0__868"<INT64,[2]> ⬅️ ::Cast(%"_val_3__865") {to=7}
4888 | # n1__868
%"view_175__865"<FLOAT16,[unk__1239,unk__1240]> ⬅️ ::Reshape(%"model_layers_8_post_attention_layernorm_1__790", %"size_0__868")
4889 | # n0__869
%"mm_60__865"<FLOAT16,[unk__1239,unk__1238]> ⬅️ ::MatMul(%"view_175__865", %"t_60__865")
4890 | # Constant_6__865
%"_val_6__865"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4891 | # n0__870
%"size_0__870"<INT64,[3]> ⬅️ ::Cast(%"_val_6__865") {to=7}
4892 | # n1__870
%"model_layers_8_mlp_gate_proj_1__864"<FLOAT16,[unk__1241,unk__1242,unk__1243]> ⬅️ ::Reshape(%"mm_60__865", %"size_0__870")
4893 | # Cast_0__871
%"_to_copy_52__871"<FLOAT,[unk__1241,unk__1242,unk__1243]> ⬅️ ::Cast(%"model_layers_8_mlp_gate_proj_1__864") {to=1}
4894 | # n0__872
%"sigmoid_8__871"<FLOAT,[unk__1241,unk__1242,unk__1243]> ⬅️ ::Sigmoid(%"_to_copy_52__871")
4895 | # n0__873
%"mul_88__871"<FLOAT,[unk__1241,unk__1242,unk__1243]> ⬅️ ::Mul(%"_to_copy_52__871", %"sigmoid_8__871")
4896 | # Cast_3__871
%"model_layers_8_mlp_act_fn_1__864"<FLOAT16,[unk__1241,unk__1242,unk__1243]> ⬅️ ::Cast(%"mul_88__871") {to=10}
4897 | # n0__876
%"tmp__876"<INT64,[2]> ⬅️ ::Shape(%"model.layers.8.mlp.up_proj.weight")
4898 | # n1__876
%"rank__875"<INT64,?> ⬅️ ::Size(%"tmp__876")
4899 | # n1__875
%"int64_2__875"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
4900 | # n2__875
%"int64_2_cast__875"<INT64,?> ⬅️ ::CastLike(%"int64_2__875", %"rank__875")
4901 | # n3__875
%"cond__875"<BOOL,?> ⬅️ ::Equal(%"rank__875", %"int64_2_cast__875")
4902 | # n4__875
%"t_61__874"<FLOAT16,[unk__1244,unk__1245]> ⬅️ ::If(%"cond__875") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__875"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__875_383
%"result__875"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.8.mlp.up_proj.weight") {perm=[1, 0]}
return %"result__875"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__875"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__875_384
%"result_0__875"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.8.mlp.up_proj.weight")
return %"result_0__875"<FLOAT16,[14336,4096]>
}}
4903 | # Constant_3__874
%"_val_3__874"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
4904 | # n0__877
%"size_0__877"<INT64,[2]> ⬅️ ::Cast(%"_val_3__874") {to=7}
4905 | # n1__877
%"view_177__874"<FLOAT16,[unk__1246,unk__1247]> ⬅️ ::Reshape(%"model_layers_8_post_attention_layernorm_1__790", %"size_0__877")
4906 | # n0__878
%"mm_61__874"<FLOAT16,[unk__1246,unk__1245]> ⬅️ ::MatMul(%"view_177__874", %"t_61__874")
4907 | # Constant_6__874
%"_val_6__874"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4908 | # n0__879
%"size_0__879"<INT64,[3]> ⬅️ ::Cast(%"_val_6__874") {to=7}
4909 | # n1__879
%"model_layers_8_mlp_up_proj_1__864"<FLOAT16,[unk__1248,unk__1249,unk__1250]> ⬅️ ::Reshape(%"mm_61__874", %"size_0__879")
4910 | # n0__880
%"mul_89__864"<FLOAT16,[unk__1251,unk__1252,unk__1253]> ⬅️ ::Mul(%"model_layers_8_mlp_act_fn_1__864", %"model_layers_8_mlp_up_proj_1__864")
4911 | # n0__883
%"tmp__883"<INT64,[2]> ⬅️ ::Shape(%"model.layers.8.mlp.down_proj.weight")
4912 | # n1__883
%"rank__882"<INT64,?> ⬅️ ::Size(%"tmp__883")
4913 | # n1__882
%"int64_2__882"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
4914 | # n2__882
%"int64_2_cast__882"<INT64,?> ⬅️ ::CastLike(%"int64_2__882", %"rank__882")
4915 | # n3__882
%"cond__882"<BOOL,?> ⬅️ ::Equal(%"rank__882", %"int64_2_cast__882")
4916 | # n4__882
%"t_62__881"<FLOAT16,[unk__1254,unk__1255]> ⬅️ ::If(%"cond__882") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__882"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__882_385
%"result__882"<FLOAT16,[14336,4096]> ⬅️ ::Transpose(%"model.layers.8.mlp.down_proj.weight") {perm=[1, 0]}
return %"result__882"<FLOAT16,[14336,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__882"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__882_386
%"result_0__882"<FLOAT16,[4096,14336]> ⬅️ ::Identity(%"model.layers.8.mlp.down_proj.weight")
return %"result_0__882"<FLOAT16,[4096,14336]>
}}
4917 | # Constant_3__881
%"_val_3__881"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
4918 | # n0__884
%"size_0__884"<INT64,[2]> ⬅️ ::Cast(%"_val_3__881") {to=7}
4919 | # n1__884
%"view_179__881"<FLOAT16,[unk__1256,unk__1257]> ⬅️ ::Reshape(%"mul_89__864", %"size_0__884")
4920 | # n0__885
%"mm_62__881"<FLOAT16,[unk__1256,unk__1255]> ⬅️ ::MatMul(%"view_179__881", %"t_62__881")
4921 | # Constant_6__881
%"_val_6__881"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4922 | # n0__886
%"size_0__886"<INT64,[3]> ⬅️ ::Cast(%"_val_6__881") {to=7}
4923 | # n1__886
%"model_layers_8_mlp_1__790"<FLOAT16,[unk__1258,unk__1259,unk__1260]> ⬅️ ::Reshape(%"mm_62__881", %"size_0__886")
4924 | # n0__887
%"alpha__887"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
4925 | # n1__887
%"alpha_0__887"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__887", %"model_layers_8_mlp_1__790")
4926 | # n2__887
%"other_1__887"<FLOAT16,[unk__1258,unk__1259,unk__1260]> ⬅️ ::Mul(%"model_layers_8_mlp_1__790", %"alpha_0__887")
4927 | # n3__887
%"model_layers_8_1_2__1"<FLOAT16,[unk__1261,128,4096]> ⬅️ ::Add(%"add_51__790", %"other_1__887")
4928 | # Cast_3__889
%"_to_copy_54__889"<FLOAT,[unk__1261,128,4096]> ⬅️ ::Cast(%"model_layers_8_1_2__1") {to=1}
4929 | # Constant_4__889
%"_val_2__889"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
4930 | # Cast_5__889
%"scalar_tensor_default_18__889"<FLOAT,?> ⬅️ ::Cast(%"_val_2__889") {to=1}
4931 | # n0__890
%"pow_19__889"<FLOAT,[unk__1261,128,4096]> ⬅️ ::Pow(%"_to_copy_54__889", %"scalar_tensor_default_18__889")
4932 | # Constant_7__889
%"_val_5__889"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
4933 | # n0__892
%"tmp__892"<INT64,[3]> ⬅️ ::Shape(%"pow_19__889")
4934 | # n1__892
%"tmp_0__892"<INT64,?> ⬅️ ::Size(%"tmp__892")
4935 | # n2__892
%"tmp_1__892"<INT64,?> ⬅️ ::Constant() {value_int=0}
4936 | # n3__892
%"cond__891"<BOOL,?> ⬅️ ::Equal(%"tmp_0__892", %"tmp_1__892")
4937 | # n1__891
%"mean_18__889"<FLOAT,?> ⬅️ ::If(%"cond__891") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__891"<FLOAT,[unk__1261,128,4096]>
),
) {
0 | # n0__891_387
%"result__891"<FLOAT,[unk__1261,128,4096]> ⬅️ ::Identity(%"pow_19__889")
return %"result__891"<FLOAT,[unk__1261,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__891"<FLOAT,?>
),
) {
0 | # n0__893
%"tmp__893"<INT64,[1]> ⬅️ ::Shape(%"_val_5__889")
1 | # n1__893
%"tmp_0__893"<INT64,?> ⬅️ ::Size(%"tmp__893")
2 | # n2__893
%"tmp_1__893"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__893
%"cond_0__891"<BOOL,?> ⬅️ ::Equal(%"tmp_0__893", %"tmp_1__893")
4 | # n1__891_389
%"dim_3__891"<INT64,?> ⬅️ ::If(%"cond_0__891") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__891"<INT64,[1,1]>
),
) {
0 | # n0__891_390
%"int64_0__891"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__891_391
%"dim_1__891"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__889", %"int64_0__891")
return %"dim_1__891"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__891"<INT64,[1]>
),
) {
0 | # n0__891_392
%"dim_2__891"<INT64,[1]> ⬅️ ::Identity(%"_val_5__889")
return %"dim_2__891"<INT64,[1]>
}}
5 | # n2__891
%"result_4__891"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_19__889", %"dim_3__891") {keepdims=1}
return %"result_4__891"<FLOAT,?>
}}
4938 | # Constant_9__889
%"_val_7__889"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
4939 | # n0__894
%"alpha__894"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
4940 | # n1__894
%"alpha_0__894"<FLOAT,?> ⬅️ ::CastLike(%"alpha__894", %"_val_7__889")
4941 | # n2__894
%"other_1__894"<FLOAT,?> ⬅️ ::Mul(%"_val_7__889", %"alpha_0__894")
4942 | # n3__894
%"add_54__889"<FLOAT,?> ⬅️ ::Add(%"mean_18__889", %"other_1__894")
4943 | # n0__895
%"tmp__895"<FLOAT,?> ⬅️ ::Sqrt(%"add_54__889")
4944 | # n1__895
%"rsqrt_18__889"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__895")
4945 | # n0__896
%"mul_90__889"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_54__889", %"rsqrt_18__889")
4946 | # Cast_13__889
%"_to_copy_55__889"<FLOAT16,?> ⬅️ ::Cast(%"mul_90__889") {to=10}
4947 | # n0__897
%"model_layers_9_input_layernorm_1__888"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.9.input_layernorm.weight", %"_to_copy_55__889")
4948 | # n0__901
%"tmp__901"<INT64,[2]> ⬅️ ::Shape(%"model.layers.9.self_attn.q_proj.weight")
4949 | # n1__901
%"rank__900"<INT64,?> ⬅️ ::Size(%"tmp__901")
4950 | # n1__900
%"int64_2__900"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
4951 | # n2__900
%"int64_2_cast__900"<INT64,?> ⬅️ ::CastLike(%"int64_2__900", %"rank__900")
4952 | # n3__900
%"cond__900"<BOOL,?> ⬅️ ::Equal(%"rank__900", %"int64_2_cast__900")
4953 | # n4__900
%"t_63__899"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__900") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__900"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__900_393
%"result__900"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.9.self_attn.q_proj.weight") {perm=[1, 0]}
return %"result__900"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__900"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__900_394
%"result_0__900"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.9.self_attn.q_proj.weight")
return %"result_0__900"<FLOAT16,[4096,4096]>
}}
4954 | # Constant_3__899
%"_val_3__899"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
4955 | # n0__902
%"size_0__902"<INT64,[2]> ⬅️ ::Cast(%"_val_3__899") {to=7}
4956 | # n1__902
%"view_181__899"<FLOAT16,[unk__1262,unk__1263]> ⬅️ ::Reshape(%"model_layers_9_input_layernorm_1__888", %"size_0__902")
4957 | # n0__903
%"mm_63__899"<FLOAT16,[unk__1262,4096]> ⬅️ ::MatMul(%"view_181__899", %"t_63__899")
4958 | # Constant_6__899
%"_val_6__899"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4959 | # n0__904
%"size_0__904"<INT64,[3]> ⬅️ ::Cast(%"_val_6__899") {to=7}
4960 | # n1__904
%"model_layers_9_self_attn_q_proj_1__898"<FLOAT16,[unk__1264,unk__1265,unk__1266]> ⬅️ ::Reshape(%"mm_63__899", %"size_0__904")
4961 | # n0__907
%"tmp__907"<INT64,[2]> ⬅️ ::Shape(%"model.layers.9.self_attn.k_proj.weight")
4962 | # n1__907
%"rank__906"<INT64,?> ⬅️ ::Size(%"tmp__907")
4963 | # n1__906
%"int64_2__906"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
4964 | # n2__906
%"int64_2_cast__906"<INT64,?> ⬅️ ::CastLike(%"int64_2__906", %"rank__906")
4965 | # n3__906
%"cond__906"<BOOL,?> ⬅️ ::Equal(%"rank__906", %"int64_2_cast__906")
4966 | # n4__906
%"t_64__905"<FLOAT16,[unk__1267,unk__1268]> ⬅️ ::If(%"cond__906") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__906"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__906_395
%"result__906"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.9.self_attn.k_proj.weight") {perm=[1, 0]}
return %"result__906"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__906"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__906_396
%"result_0__906"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.9.self_attn.k_proj.weight")
return %"result_0__906"<FLOAT16,[1024,4096]>
}}
4967 | # Constant_3__905
%"_val_3__905"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
4968 | # n0__908
%"size_0__908"<INT64,[2]> ⬅️ ::Cast(%"_val_3__905") {to=7}
4969 | # n1__908
%"view_183__905"<FLOAT16,[unk__1269,unk__1270]> ⬅️ ::Reshape(%"model_layers_9_input_layernorm_1__888", %"size_0__908")
4970 | # n0__909
%"mm_64__905"<FLOAT16,[unk__1269,unk__1268]> ⬅️ ::MatMul(%"view_183__905", %"t_64__905")
4971 | # Constant_6__905
%"_val_6__905"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4972 | # n0__910
%"size_0__910"<INT64,[3]> ⬅️ ::Cast(%"_val_6__905") {to=7}
4973 | # n1__910
%"model_layers_9_self_attn_k_proj_1__898"<FLOAT16,[unk__1271,unk__1272,unk__1273]> ⬅️ ::Reshape(%"mm_64__905", %"size_0__910")
4974 | # n0__913
%"tmp__913"<INT64,[2]> ⬅️ ::Shape(%"model.layers.9.self_attn.v_proj.weight")
4975 | # n1__913
%"rank__912"<INT64,?> ⬅️ ::Size(%"tmp__913")
4976 | # n1__912
%"int64_2__912"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
4977 | # n2__912
%"int64_2_cast__912"<INT64,?> ⬅️ ::CastLike(%"int64_2__912", %"rank__912")
4978 | # n3__912
%"cond__912"<BOOL,?> ⬅️ ::Equal(%"rank__912", %"int64_2_cast__912")
4979 | # n4__912
%"t_65__911"<FLOAT16,[unk__1274,unk__1275]> ⬅️ ::If(%"cond__912") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__912"<FLOAT16,[4096,1024]>
),
) {
0 | # n0__912_397
%"result__912"<FLOAT16,[4096,1024]> ⬅️ ::Transpose(%"model.layers.9.self_attn.v_proj.weight") {perm=[1, 0]}
return %"result__912"<FLOAT16,[4096,1024]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__912"<FLOAT16,[1024,4096]>
),
) {
0 | # n0__912_398
%"result_0__912"<FLOAT16,[1024,4096]> ⬅️ ::Identity(%"model.layers.9.self_attn.v_proj.weight")
return %"result_0__912"<FLOAT16,[1024,4096]>
}}
4980 | # Constant_3__911
%"_val_3__911"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
4981 | # n0__914
%"size_0__914"<INT64,[2]> ⬅️ ::Cast(%"_val_3__911") {to=7}
4982 | # n1__914
%"view_185__911"<FLOAT16,[unk__1276,unk__1277]> ⬅️ ::Reshape(%"model_layers_9_input_layernorm_1__888", %"size_0__914")
4983 | # n0__915
%"mm_65__911"<FLOAT16,[unk__1276,unk__1275]> ⬅️ ::MatMul(%"view_185__911", %"t_65__911")
4984 | # Constant_6__911
%"_val_6__911"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
4985 | # n0__916
%"size_0__916"<INT64,[3]> ⬅️ ::Cast(%"_val_6__911") {to=7}
4986 | # n1__916
%"model_layers_9_self_attn_v_proj_1__898"<FLOAT16,[unk__1278,unk__1279,unk__1280]> ⬅️ ::Reshape(%"mm_65__911", %"size_0__916")
4987 | # Constant_61__898
%"_val_8__898"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
4988 | # n0__917
%"size_0__917"<INT64,[4]> ⬅️ ::Cast(%"_val_8__898") {to=7}
4989 | # n1__917
%"view_187__898"<FLOAT16,[unk__1281,unk__1282,unk__1283,unk__1284]> ⬅️ ::Reshape(%"model_layers_9_self_attn_q_proj_1__898", %"size_0__917")
4990 | # Transpose_63__898
%"transpose_36__898"<FLOAT16,[unk__1281,unk__1283,unk__1282,unk__1284]> ⬅️ ::Transpose(%"view_187__898") {perm=[0, 2, 1, 3]}
4991 | # Constant_64__898
%"_val_11__898"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
4992 | # n0__918
%"size_0__918"<INT64,[4]> ⬅️ ::Cast(%"_val_11__898") {to=7}
4993 | # n1__918
%"view_188__898"<FLOAT16,[unk__1285,unk__1286,unk__1287,unk__1288]> ⬅️ ::Reshape(%"model_layers_9_self_attn_k_proj_1__898", %"size_0__918")
4994 | # Transpose_66__898
%"transpose_37__898"<FLOAT16,[unk__1285,unk__1287,unk__1286,unk__1288]> ⬅️ ::Transpose(%"view_188__898") {perm=[0, 2, 1, 3]}
4995 | # Constant_67__898
%"_val_14__898"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
4996 | # n0__919
%"size_0__919"<INT64,[4]> ⬅️ ::Cast(%"_val_14__898") {to=7}
4997 | # n1__919
%"view_189__898"<FLOAT16,[unk__1289,unk__1290,unk__1291,unk__1292]> ⬅️ ::Reshape(%"model_layers_9_self_attn_v_proj_1__898", %"size_0__919")
4998 | # Transpose_69__898
%"model_1_18"<FLOAT16,[1,8,128,128]> ⬅️ ::Transpose(%"view_189__898") {perm=[0, 2, 1, 3]}
4999 | # Constant_8__920
%"_val_1__920"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5000 | # Cast_9__920
%"_val_2__920"<INT64,?> ⬅️ ::Cast(%"_val_1__920") {to=7}
5001 | # Constant_10__920
%"_val_3__920"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5002 | # Reshape_11__920
%"_val_4__920"<INT64,[1]> ⬅️ ::Reshape(%"_val_2__920", %"_val_3__920") {allowzero=0}
5003 | # Constant_12__920
%"_val_5__920"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5004 | # Cast_13__920
%"_val_6__920"<INT64,?> ⬅️ ::Cast(%"_val_5__920") {to=7}
5005 | # Constant_14__920
%"_val_7__920"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5006 | # Reshape_15__920
%"_val_8__920"<INT64,[1]> ⬅️ ::Reshape(%"_val_6__920", %"_val_7__920") {allowzero=0}
5007 | # Constant_16__920
%"_val_9__920"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5008 | # Cast_17__920
%"_val_10__920"<INT64,?> ⬅️ ::Cast(%"_val_9__920") {to=7}
5009 | # Constant_18__920
%"_val_11__920"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5010 | # Reshape_19__920
%"_val_12__920"<INT64,[1]> ⬅️ ::Reshape(%"_val_10__920", %"_val_11__920") {allowzero=0}
5011 | # Constant_20__920
%"_val_13__920"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5012 | # Cast_21__920
%"_val_14__920"<INT64,?> ⬅️ ::Cast(%"_val_13__920") {to=7}
5013 | # Constant_22__920
%"_val_15__920"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5014 | # Reshape_23__920
%"_val_16__920"<INT64,[1]> ⬅️ ::Reshape(%"_val_14__920", %"_val_15__920") {allowzero=0}
5015 | # Slice_24__920
%"model_layers_9_self_attn_rotary_emb_1__898"<FLOAT16,[unk__1293,unk__1294]> ⬅️ ::Slice(%"model.layers.9.self_attn.rotary_emb.cos_cached", %"_val_4__920", %"_val_8__920", %"_val_12__920", %"_val_16__920")
5016 | # Constant_25__920
%"_val_19__920"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5017 | # Cast_26__920
%"_val_20__920"<INT64,?> ⬅️ ::Cast(%"_val_19__920") {to=7}
5018 | # Constant_27__920
%"_val_21__920"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5019 | # Reshape_28__920
%"_val_22__920"<INT64,[1]> ⬅️ ::Reshape(%"_val_20__920", %"_val_21__920") {allowzero=0}
5020 | # Constant_29__920
%"_val_23__920"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5021 | # Cast_30__920
%"_val_24__920"<INT64,?> ⬅️ ::Cast(%"_val_23__920") {to=7}
5022 | # Constant_31__920
%"_val_25__920"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5023 | # Reshape_32__920
%"_val_26__920"<INT64,[1]> ⬅️ ::Reshape(%"_val_24__920", %"_val_25__920") {allowzero=0}
5024 | # Constant_33__920
%"_val_27__920"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5025 | # Cast_34__920
%"_val_28__920"<INT64,?> ⬅️ ::Cast(%"_val_27__920") {to=7}
5026 | # Constant_35__920
%"_val_29__920"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5027 | # Reshape_36__920
%"_val_30__920"<INT64,[1]> ⬅️ ::Reshape(%"_val_28__920", %"_val_29__920") {allowzero=0}
5028 | # Constant_37__920
%"_val_31__920"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5029 | # Cast_38__920
%"_val_32__920"<INT64,?> ⬅️ ::Cast(%"_val_31__920") {to=7}
5030 | # Constant_39__920
%"_val_33__920"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5031 | # Reshape_40__920
%"_val_34__920"<INT64,[1]> ⬅️ ::Reshape(%"_val_32__920", %"_val_33__920") {allowzero=0}
5032 | # Slice_41__920
%"model_layers_9_self_attn_rotary_emb_1_1__898"<FLOAT16,[unk__1295,unk__1296]> ⬅️ ::Slice(%"model.layers.9.self_attn.rotary_emb.sin_cached", %"_val_22__920", %"_val_26__920", %"_val_30__920", %"_val_34__920")
5033 | # Transpose_71__898
%"_val_21__898"<FLOAT16,[unk__1293,unk__1294]> ⬅️ ::Transpose(%"model_layers_9_self_attn_rotary_emb_1__898") {perm=[0, 1]}
5034 | # Max_72__898
%"_val_22__898"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
5035 | # Shape_73__898
%"_val_23__898"<INT64,[2]> ⬅️ ::Shape(%"_val_22__898") {start=0}
5036 | # Expand_74__898
%"_val_24__898"<INT64,[unk__1297,unk__1298]> ⬅️ ::Expand(%"view__1", %"_val_23__898")
5037 | # Constant_75__898
%"_val_25__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5038 | # Unsqueeze_76__898
%"_val_26__898"<INT64,[unk__1297,unk__1298,1]> ⬅️ ::Unsqueeze(%"_val_24__898", %"_val_25__898")
5039 | # Concat_77__898
%"_val_27__898"<INT64,[unk__1297,unk__1298,1]> ⬅️ ::Concat(%"_val_26__898") {axis=-1}
5040 | # GatherND_78__898
%"_val_28__898"<FLOAT16,[unk__1297,unk__1298,unk__1294]> ⬅️ ::GatherND(%"_val_21__898", %"_val_27__898") {batch_dims=0}
5041 | # Transpose_79__898
%"index_18__898"<FLOAT16,[unk__1297,unk__1298,unk__1294]> ⬅️ ::Transpose(%"_val_28__898") {perm=[0, 1, 2]}
5042 | # n0__921
%"dim__921"<INT64,?> ⬅️ ::Constant() {value_int=1}
5043 | # n1__921
%"dim_0__921"<INT64,?> ⬅️ ::Cast(%"dim__921") {to=7}
5044 | # n2__921
%"unsqueeze_37__898"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_18__898", %"dim_0__921")
5045 | # Transpose_81__898
%"_val_31__898"<FLOAT16,[unk__1295,unk__1296]> ⬅️ ::Transpose(%"model_layers_9_self_attn_rotary_emb_1_1__898") {perm=[0, 1]}
5046 | # Max_82__898
%"_val_32__898"<INT64,[unk__1,unk__2]> ⬅️ ::Max(%"view__1")
5047 | # Shape_83__898
%"_val_33__898"<INT64,[2]> ⬅️ ::Shape(%"_val_32__898") {start=0}
5048 | # Expand_84__898
%"_val_34__898"<INT64,[unk__1299,unk__1300]> ⬅️ ::Expand(%"view__1", %"_val_33__898")
5049 | # Constant_85__898
%"_val_35__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5050 | # Unsqueeze_86__898
%"_val_36__898"<INT64,[unk__1299,unk__1300,1]> ⬅️ ::Unsqueeze(%"_val_34__898", %"_val_35__898")
5051 | # Concat_87__898
%"_val_37__898"<INT64,[unk__1299,unk__1300,1]> ⬅️ ::Concat(%"_val_36__898") {axis=-1}
5052 | # GatherND_88__898
%"_val_38__898"<FLOAT16,[unk__1299,unk__1300,unk__1296]> ⬅️ ::GatherND(%"_val_31__898", %"_val_37__898") {batch_dims=0}
5053 | # Transpose_89__898
%"index_19__898"<FLOAT16,[unk__1299,unk__1300,unk__1296]> ⬅️ ::Transpose(%"_val_38__898") {perm=[0, 1, 2]}
5054 | # n0__922
%"dim__922"<INT64,?> ⬅️ ::Constant() {value_int=1}
5055 | # n1__922
%"dim_0__922"<INT64,?> ⬅️ ::Cast(%"dim__922") {to=7}
5056 | # n2__922
%"unsqueeze_38__898"<FLOAT16,?> ⬅️ ::Unsqueeze(%"index_19__898", %"dim_0__922")
5057 | # n0__923
%"mul_92__898"<FLOAT16,?> ⬅️ ::Mul(%"transpose_36__898", %"unsqueeze_37__898")
5058 | # Constant_92__898
%"_val_42__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5059 | # Cast_93__898
%"_val_43__898"<INT64,?> ⬅️ ::Cast(%"_val_42__898") {to=7}
5060 | # Constant_94__898
%"_val_44__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5061 | # Reshape_95__898
%"_val_45__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_43__898", %"_val_44__898") {allowzero=0}
5062 | # Constant_96__898
%"_val_46__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5063 | # Cast_97__898
%"_val_47__898"<INT64,?> ⬅️ ::Cast(%"_val_46__898") {to=7}
5064 | # Constant_98__898
%"_val_48__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5065 | # Reshape_99__898
%"_val_49__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_47__898", %"_val_48__898") {allowzero=0}
5066 | # Constant_100__898
%"_val_50__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5067 | # Cast_101__898
%"_val_51__898"<INT64,?> ⬅️ ::Cast(%"_val_50__898") {to=7}
5068 | # Constant_102__898
%"_val_52__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5069 | # Reshape_103__898
%"_val_53__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_51__898", %"_val_52__898") {allowzero=0}
5070 | # Constant_104__898
%"_val_54__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5071 | # Cast_105__898
%"_val_55__898"<INT64,?> ⬅️ ::Cast(%"_val_54__898") {to=7}
5072 | # Constant_106__898
%"_val_56__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5073 | # Reshape_107__898
%"_val_57__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_55__898", %"_val_56__898") {allowzero=0}
5074 | # Slice_108__898
%"slice_129__898"<FLOAT16,[unk__1301,unk__1302,unk__1303,unk__1304]> ⬅️ ::Slice(%"transpose_36__898", %"_val_45__898", %"_val_49__898", %"_val_53__898", %"_val_57__898")
5075 | # Constant_109__898
%"_val_59__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5076 | # Cast_110__898
%"_val_60__898"<INT64,?> ⬅️ ::Cast(%"_val_59__898") {to=7}
5077 | # Constant_111__898
%"_val_61__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5078 | # Reshape_112__898
%"_val_62__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_60__898", %"_val_61__898") {allowzero=0}
5079 | # Constant_113__898
%"_val_63__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5080 | # Cast_114__898
%"_val_64__898"<INT64,?> ⬅️ ::Cast(%"_val_63__898") {to=7}
5081 | # Constant_115__898
%"_val_65__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5082 | # Reshape_116__898
%"_val_66__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_64__898", %"_val_65__898") {allowzero=0}
5083 | # Constant_117__898
%"_val_67__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5084 | # Cast_118__898
%"_val_68__898"<INT64,?> ⬅️ ::Cast(%"_val_67__898") {to=7}
5085 | # Constant_119__898
%"_val_69__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5086 | # Reshape_120__898
%"_val_70__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_68__898", %"_val_69__898") {allowzero=0}
5087 | # Constant_121__898
%"_val_71__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5088 | # Cast_122__898
%"_val_72__898"<INT64,?> ⬅️ ::Cast(%"_val_71__898") {to=7}
5089 | # Constant_123__898
%"_val_73__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5090 | # Reshape_124__898
%"_val_74__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_72__898", %"_val_73__898") {allowzero=0}
5091 | # Slice_125__898
%"slice_130__898"<FLOAT16,[unk__1305,unk__1306,unk__1307,unk__1308]> ⬅️ ::Slice(%"transpose_36__898", %"_val_62__898", %"_val_66__898", %"_val_70__898", %"_val_74__898")
5092 | # n0__924
%"neg_18__898"<FLOAT16,[unk__1305,unk__1306,unk__1307,unk__1308]> ⬅️ ::Neg(%"slice_130__898")
5093 | # SequenceConstruct_127__898
%"77__898"<Sequence(Tensor(FLOAT16)),[unk__1309,unk__1310,unk__1311,unk__1312]> ⬅️ ::SequenceConstruct(%"neg_18__898", %"slice_129__898")
5094 | # n0__925
%"cat_18__898"<FLOAT16,[unk__1309,unk__1310,unk__1311,unk__1313]> ⬅️ ::ConcatFromSequence(%"77__898") {axis=-1}
5095 | # n0__926
%"mul_93__898"<FLOAT16,?> ⬅️ ::Mul(%"cat_18__898", %"unsqueeze_38__898")
5096 | # n0__927
%"alpha__927"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
5097 | # n1__927
%"alpha_0__927"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__927", %"mul_93__898")
5098 | # n2__927
%"other_1__927"<FLOAT16,?> ⬅️ ::Mul(%"mul_93__898", %"alpha_0__927")
5099 | # n3__927
%"add_55__898"<FLOAT16,?> ⬅️ ::Add(%"mul_92__898", %"other_1__927")
5100 | # n0__928
%"mul_94__898"<FLOAT16,?> ⬅️ ::Mul(%"transpose_37__898", %"unsqueeze_37__898")
5101 | # Constant_132__898
%"_val_82__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5102 | # Cast_133__898
%"_val_83__898"<INT64,?> ⬅️ ::Cast(%"_val_82__898") {to=7}
5103 | # Constant_134__898
%"_val_84__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5104 | # Reshape_135__898
%"_val_85__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_83__898", %"_val_84__898") {allowzero=0}
5105 | # Constant_136__898
%"_val_86__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5106 | # Cast_137__898
%"_val_87__898"<INT64,?> ⬅️ ::Cast(%"_val_86__898") {to=7}
5107 | # Constant_138__898
%"_val_88__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5108 | # Reshape_139__898
%"_val_89__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_87__898", %"_val_88__898") {allowzero=0}
5109 | # Constant_140__898
%"_val_90__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5110 | # Cast_141__898
%"_val_91__898"<INT64,?> ⬅️ ::Cast(%"_val_90__898") {to=7}
5111 | # Constant_142__898
%"_val_92__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5112 | # Reshape_143__898
%"_val_93__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_91__898", %"_val_92__898") {allowzero=0}
5113 | # Constant_144__898
%"_val_94__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5114 | # Cast_145__898
%"_val_95__898"<INT64,?> ⬅️ ::Cast(%"_val_94__898") {to=7}
5115 | # Constant_146__898
%"_val_96__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5116 | # Reshape_147__898
%"_val_97__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_95__898", %"_val_96__898") {allowzero=0}
5117 | # Slice_148__898
%"slice_131__898"<FLOAT16,[unk__1314,unk__1315,unk__1316,unk__1317]> ⬅️ ::Slice(%"transpose_37__898", %"_val_85__898", %"_val_89__898", %"_val_93__898", %"_val_97__898")
5118 | # Constant_149__898
%"_val_99__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5119 | # Cast_150__898
%"_val_100__898"<INT64,?> ⬅️ ::Cast(%"_val_99__898") {to=7}
5120 | # Constant_151__898
%"_val_101__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5121 | # Reshape_152__898
%"_val_102__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_100__898", %"_val_101__898") {allowzero=0}
5122 | # Constant_153__898
%"_val_103__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5123 | # Cast_154__898
%"_val_104__898"<INT64,?> ⬅️ ::Cast(%"_val_103__898") {to=7}
5124 | # Constant_155__898
%"_val_105__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5125 | # Reshape_156__898
%"_val_106__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_104__898", %"_val_105__898") {allowzero=0}
5126 | # Constant_157__898
%"_val_107__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5127 | # Cast_158__898
%"_val_108__898"<INT64,?> ⬅️ ::Cast(%"_val_107__898") {to=7}
5128 | # Constant_159__898
%"_val_109__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5129 | # Reshape_160__898
%"_val_110__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_108__898", %"_val_109__898") {allowzero=0}
5130 | # Constant_161__898
%"_val_111__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5131 | # Cast_162__898
%"_val_112__898"<INT64,?> ⬅️ ::Cast(%"_val_111__898") {to=7}
5132 | # Constant_163__898
%"_val_113__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5133 | # Reshape_164__898
%"_val_114__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_112__898", %"_val_113__898") {allowzero=0}
5134 | # Slice_165__898
%"slice_132__898"<FLOAT16,[unk__1318,unk__1319,unk__1320,unk__1321]> ⬅️ ::Slice(%"transpose_37__898", %"_val_102__898", %"_val_106__898", %"_val_110__898", %"_val_114__898")
5135 | # n0__929
%"neg_19__898"<FLOAT16,[unk__1318,unk__1319,unk__1320,unk__1321]> ⬅️ ::Neg(%"slice_132__898")
5136 | # SequenceConstruct_167__898
%"117__898"<Sequence(Tensor(FLOAT16)),[unk__1322,unk__1323,unk__1324,unk__1325]> ⬅️ ::SequenceConstruct(%"neg_19__898", %"slice_131__898")
5137 | # n0__930
%"cat_19__898"<FLOAT16,[unk__1322,unk__1323,unk__1324,unk__1326]> ⬅️ ::ConcatFromSequence(%"117__898") {axis=-1}
5138 | # n0__931
%"mul_95__898"<FLOAT16,?> ⬅️ ::Mul(%"cat_19__898", %"unsqueeze_38__898")
5139 | # n0__932
%"alpha__932"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
5140 | # n1__932
%"alpha_0__932"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__932", %"mul_95__898")
5141 | # n2__932
%"other_1__932"<FLOAT16,?> ⬅️ ::Mul(%"mul_95__898", %"alpha_0__932")
5142 | # n3__932
%"model_1_19"<FLOAT16,[1,8,128,128]> ⬅️ ::Add(%"mul_94__898", %"other_1__932")
5143 | # Constant_171__898
%"_val_121__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5144 | # Cast_172__898
%"_val_122__898"<INT64,?> ⬅️ ::Cast(%"_val_121__898") {to=7}
5145 | # Constant_173__898
%"_val_123__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5146 | # Reshape_174__898
%"_val_124__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_122__898", %"_val_123__898") {allowzero=0}
5147 | # Constant_175__898
%"_val_125__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5148 | # Cast_176__898
%"_val_126__898"<INT64,?> ⬅️ ::Cast(%"_val_125__898") {to=7}
5149 | # Constant_177__898
%"_val_127__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5150 | # Reshape_178__898
%"_val_128__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_126__898", %"_val_127__898") {allowzero=0}
5151 | # Constant_179__898
%"_val_129__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5152 | # Cast_180__898
%"_val_130__898"<INT64,?> ⬅️ ::Cast(%"_val_129__898") {to=7}
5153 | # Constant_181__898
%"_val_131__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5154 | # Reshape_182__898
%"_val_132__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_130__898", %"_val_131__898") {allowzero=0}
5155 | # Constant_183__898
%"_val_133__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5156 | # Cast_184__898
%"_val_134__898"<INT64,?> ⬅️ ::Cast(%"_val_133__898") {to=7}
5157 | # Constant_185__898
%"_val_135__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5158 | # Reshape_186__898
%"_val_136__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_134__898", %"_val_135__898") {allowzero=0}
5159 | # Slice_187__898
%"slice_133__898"<FLOAT16,[unk__1327,unk__1328,unk__1329,unk__1330]> ⬅️ ::Slice(%"model_1_19", %"_val_124__898", %"_val_128__898", %"_val_132__898", %"_val_136__898")
5160 | # Constant_188__898
%"_val_138__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5161 | # Cast_189__898
%"_val_139__898"<INT64,?> ⬅️ ::Cast(%"_val_138__898") {to=7}
5162 | # Constant_190__898
%"_val_140__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5163 | # Reshape_191__898
%"_val_141__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_139__898", %"_val_140__898") {allowzero=0}
5164 | # Constant_192__898
%"_val_142__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5165 | # Cast_193__898
%"_val_143__898"<INT64,?> ⬅️ ::Cast(%"_val_142__898") {to=7}
5166 | # Constant_194__898
%"_val_144__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5167 | # Reshape_195__898
%"_val_145__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_143__898", %"_val_144__898") {allowzero=0}
5168 | # Constant_196__898
%"_val_146__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5169 | # Cast_197__898
%"_val_147__898"<INT64,?> ⬅️ ::Cast(%"_val_146__898") {to=7}
5170 | # Constant_198__898
%"_val_148__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5171 | # Reshape_199__898
%"_val_149__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_147__898", %"_val_148__898") {allowzero=0}
5172 | # Constant_200__898
%"_val_150__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5173 | # Cast_201__898
%"_val_151__898"<INT64,?> ⬅️ ::Cast(%"_val_150__898") {to=7}
5174 | # Constant_202__898
%"_val_152__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5175 | # Reshape_203__898
%"_val_153__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_151__898", %"_val_152__898") {allowzero=0}
5176 | # Slice_204__898
%"slice_134__898"<FLOAT16,[unk__1331,unk__1332,unk__1333,unk__1334]> ⬅️ ::Slice(%"slice_133__898", %"_val_141__898", %"_val_145__898", %"_val_149__898", %"_val_153__898")
5177 | # n0__933
%"dim__933"<INT64,?> ⬅️ ::Constant() {value_int=2}
5178 | # n1__933
%"dim_0__933"<INT64,?> ⬅️ ::Cast(%"dim__933") {to=7}
5179 | # n2__933
%"unsqueeze_39__898"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_134__898", %"dim_0__933")
5180 | # Constant_206__898
%"_val_156__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5181 | # Cast_207__898
%"_val_157__898"<INT64,?> ⬅️ ::Cast(%"_val_156__898") {to=7}
5182 | # Constant_208__898
%"_val_158__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5183 | # Reshape_209__898
%"_val_159__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_157__898", %"_val_158__898") {allowzero=0}
5184 | # Constant_210__898
%"_val_160__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5185 | # Cast_211__898
%"_val_161__898"<INT64,?> ⬅️ ::Cast(%"_val_160__898") {to=7}
5186 | # Constant_212__898
%"_val_162__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5187 | # Reshape_213__898
%"_val_163__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_161__898", %"_val_162__898") {allowzero=0}
5188 | # Constant_214__898
%"_val_164__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5189 | # Cast_215__898
%"_val_165__898"<INT64,?> ⬅️ ::Cast(%"_val_164__898") {to=7}
5190 | # Constant_216__898
%"_val_166__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5191 | # Reshape_217__898
%"_val_167__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_165__898", %"_val_166__898") {allowzero=0}
5192 | # Constant_218__898
%"_val_168__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5193 | # Cast_219__898
%"_val_169__898"<INT64,?> ⬅️ ::Cast(%"_val_168__898") {to=7}
5194 | # Constant_220__898
%"_val_170__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5195 | # Reshape_221__898
%"_val_171__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_169__898", %"_val_170__898") {allowzero=0}
5196 | # Slice_222__898
%"slice_135__898"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_39__898", %"_val_159__898", %"_val_163__898", %"_val_167__898", %"_val_171__898")
5197 | # Constant_223__898
%"_val_173__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5198 | # Cast_224__898
%"_val_174__898"<INT64,?> ⬅️ ::Cast(%"_val_173__898") {to=7}
5199 | # Constant_225__898
%"_val_175__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5200 | # Reshape_226__898
%"_val_176__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_174__898", %"_val_175__898") {allowzero=0}
5201 | # Constant_227__898
%"_val_177__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5202 | # Cast_228__898
%"_val_178__898"<INT64,?> ⬅️ ::Cast(%"_val_177__898") {to=7}
5203 | # Constant_229__898
%"_val_179__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5204 | # Reshape_230__898
%"_val_180__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_178__898", %"_val_179__898") {allowzero=0}
5205 | # Constant_231__898
%"_val_181__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5206 | # Cast_232__898
%"_val_182__898"<INT64,?> ⬅️ ::Cast(%"_val_181__898") {to=7}
5207 | # Constant_233__898
%"_val_183__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5208 | # Reshape_234__898
%"_val_184__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_182__898", %"_val_183__898") {allowzero=0}
5209 | # Constant_235__898
%"_val_185__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5210 | # Cast_236__898
%"_val_186__898"<INT64,?> ⬅️ ::Cast(%"_val_185__898") {to=7}
5211 | # Constant_237__898
%"_val_187__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5212 | # Reshape_238__898
%"_val_188__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_186__898", %"_val_187__898") {allowzero=0}
5213 | # Slice_239__898
%"slice_136__898"<FLOAT16,?> ⬅️ ::Slice(%"slice_135__898", %"_val_176__898", %"_val_180__898", %"_val_184__898", %"_val_188__898")
5214 | # Constant_240__898
%"_val_190__898"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
5215 | # n0__934
%"size_0__934"<INT64,[5]> ⬅️ ::Cast(%"_val_190__898") {to=7}
5216 | # n1__934
%"size_1__934"<INT64,[5]> ⬅️ ::Abs(%"size_0__934")
5217 | # n2__934
%"expand_18__898"<FLOAT16,?> ⬅️ ::Expand(%"slice_136__898", %"size_1__934")
5218 | # n0__935
%"clone_18__898"<FLOAT16,?> ⬅️ ::Identity(%"expand_18__898")
5219 | # Constant_243__898
%"_val_193__898"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
5220 | # n0__936
%"size_0__936"<INT64,[4]> ⬅️ ::Cast(%"_val_193__898") {to=7}
5221 | # n1__936
%"view_190__898"<FLOAT16,[unk__1335,unk__1336,unk__1337,unk__1338]> ⬅️ ::Reshape(%"clone_18__898", %"size_0__936")
5222 | # Constant_245__898
%"_val_195__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5223 | # Cast_246__898
%"_val_196__898"<INT64,?> ⬅️ ::Cast(%"_val_195__898") {to=7}
5224 | # Constant_247__898
%"_val_197__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5225 | # Reshape_248__898
%"_val_198__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_196__898", %"_val_197__898") {allowzero=0}
5226 | # Constant_249__898
%"_val_199__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5227 | # Cast_250__898
%"_val_200__898"<INT64,?> ⬅️ ::Cast(%"_val_199__898") {to=7}
5228 | # Constant_251__898
%"_val_201__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5229 | # Reshape_252__898
%"_val_202__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_200__898", %"_val_201__898") {allowzero=0}
5230 | # Constant_253__898
%"_val_203__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5231 | # Cast_254__898
%"_val_204__898"<INT64,?> ⬅️ ::Cast(%"_val_203__898") {to=7}
5232 | # Constant_255__898
%"_val_205__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5233 | # Reshape_256__898
%"_val_206__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_204__898", %"_val_205__898") {allowzero=0}
5234 | # Constant_257__898
%"_val_207__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5235 | # Cast_258__898
%"_val_208__898"<INT64,?> ⬅️ ::Cast(%"_val_207__898") {to=7}
5236 | # Constant_259__898
%"_val_209__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5237 | # Reshape_260__898
%"_val_210__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_208__898", %"_val_209__898") {allowzero=0}
5238 | # Slice_261__898
%"slice_137__898"<FLOAT16,[unk__1339,unk__1340,unk__1341,unk__1342]> ⬅️ ::Slice(%"model_1_18", %"_val_198__898", %"_val_202__898", %"_val_206__898", %"_val_210__898")
5239 | # Constant_262__898
%"_val_212__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5240 | # Cast_263__898
%"_val_213__898"<INT64,?> ⬅️ ::Cast(%"_val_212__898") {to=7}
5241 | # Constant_264__898
%"_val_214__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5242 | # Reshape_265__898
%"_val_215__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_213__898", %"_val_214__898") {allowzero=0}
5243 | # Constant_266__898
%"_val_216__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5244 | # Cast_267__898
%"_val_217__898"<INT64,?> ⬅️ ::Cast(%"_val_216__898") {to=7}
5245 | # Constant_268__898
%"_val_218__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5246 | # Reshape_269__898
%"_val_219__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_217__898", %"_val_218__898") {allowzero=0}
5247 | # Constant_270__898
%"_val_220__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5248 | # Cast_271__898
%"_val_221__898"<INT64,?> ⬅️ ::Cast(%"_val_220__898") {to=7}
5249 | # Constant_272__898
%"_val_222__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5250 | # Reshape_273__898
%"_val_223__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_221__898", %"_val_222__898") {allowzero=0}
5251 | # Constant_274__898
%"_val_224__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5252 | # Cast_275__898
%"_val_225__898"<INT64,?> ⬅️ ::Cast(%"_val_224__898") {to=7}
5253 | # Constant_276__898
%"_val_226__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5254 | # Reshape_277__898
%"_val_227__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_225__898", %"_val_226__898") {allowzero=0}
5255 | # Slice_278__898
%"slice_138__898"<FLOAT16,[unk__1343,unk__1344,unk__1345,unk__1346]> ⬅️ ::Slice(%"slice_137__898", %"_val_215__898", %"_val_219__898", %"_val_223__898", %"_val_227__898")
5256 | # n0__937
%"dim__937"<INT64,?> ⬅️ ::Constant() {value_int=2}
5257 | # n1__937
%"dim_0__937"<INT64,?> ⬅️ ::Cast(%"dim__937") {to=7}
5258 | # n2__937
%"unsqueeze_40__898"<FLOAT16,?> ⬅️ ::Unsqueeze(%"slice_138__898", %"dim_0__937")
5259 | # Constant_280__898
%"_val_230__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5260 | # Cast_281__898
%"_val_231__898"<INT64,?> ⬅️ ::Cast(%"_val_230__898") {to=7}
5261 | # Constant_282__898
%"_val_232__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5262 | # Reshape_283__898
%"_val_233__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_231__898", %"_val_232__898") {allowzero=0}
5263 | # Constant_284__898
%"_val_234__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5264 | # Cast_285__898
%"_val_235__898"<INT64,?> ⬅️ ::Cast(%"_val_234__898") {to=7}
5265 | # Constant_286__898
%"_val_236__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5266 | # Reshape_287__898
%"_val_237__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_235__898", %"_val_236__898") {allowzero=0}
5267 | # Constant_288__898
%"_val_238__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5268 | # Cast_289__898
%"_val_239__898"<INT64,?> ⬅️ ::Cast(%"_val_238__898") {to=7}
5269 | # Constant_290__898
%"_val_240__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5270 | # Reshape_291__898
%"_val_241__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_239__898", %"_val_240__898") {allowzero=0}
5271 | # Constant_292__898
%"_val_242__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5272 | # Cast_293__898
%"_val_243__898"<INT64,?> ⬅️ ::Cast(%"_val_242__898") {to=7}
5273 | # Constant_294__898
%"_val_244__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5274 | # Reshape_295__898
%"_val_245__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_243__898", %"_val_244__898") {allowzero=0}
5275 | # Slice_296__898
%"slice_139__898"<FLOAT16,?> ⬅️ ::Slice(%"unsqueeze_40__898", %"_val_233__898", %"_val_237__898", %"_val_241__898", %"_val_245__898")
5276 | # Constant_297__898
%"_val_247__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5277 | # Cast_298__898
%"_val_248__898"<INT64,?> ⬅️ ::Cast(%"_val_247__898") {to=7}
5278 | # Constant_299__898
%"_val_249__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5279 | # Reshape_300__898
%"_val_250__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_248__898", %"_val_249__898") {allowzero=0}
5280 | # Constant_301__898
%"_val_251__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5281 | # Cast_302__898
%"_val_252__898"<INT64,?> ⬅️ ::Cast(%"_val_251__898") {to=7}
5282 | # Constant_303__898
%"_val_253__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5283 | # Reshape_304__898
%"_val_254__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_252__898", %"_val_253__898") {allowzero=0}
5284 | # Constant_305__898
%"_val_255__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5285 | # Cast_306__898
%"_val_256__898"<INT64,?> ⬅️ ::Cast(%"_val_255__898") {to=7}
5286 | # Constant_307__898
%"_val_257__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5287 | # Reshape_308__898
%"_val_258__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_256__898", %"_val_257__898") {allowzero=0}
5288 | # Constant_309__898
%"_val_259__898"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5289 | # Cast_310__898
%"_val_260__898"<INT64,?> ⬅️ ::Cast(%"_val_259__898") {to=7}
5290 | # Constant_311__898
%"_val_261__898"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5291 | # Reshape_312__898
%"_val_262__898"<INT64,[1]> ⬅️ ::Reshape(%"_val_260__898", %"_val_261__898") {allowzero=0}
5292 | # Slice_313__898
%"slice_140__898"<FLOAT16,?> ⬅️ ::Slice(%"slice_139__898", %"_val_250__898", %"_val_254__898", %"_val_258__898", %"_val_262__898")
5293 | # Constant_314__898
%"_val_264__898"<INT64,[5]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[5]>('')}
5294 | # n0__938
%"size_0__938"<INT64,[5]> ⬅️ ::Cast(%"_val_264__898") {to=7}
5295 | # n1__938
%"size_1__938"<INT64,[5]> ⬅️ ::Abs(%"size_0__938")
5296 | # n2__938
%"expand_19__898"<FLOAT16,?> ⬅️ ::Expand(%"slice_140__898", %"size_1__938")
5297 | # n0__939
%"clone_19__898"<FLOAT16,?> ⬅️ ::Identity(%"expand_19__898")
5298 | # Constant_317__898
%"_val_267__898"<INT64,[4]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[4]>('')}
5299 | # n0__940
%"size_0__940"<INT64,[4]> ⬅️ ::Cast(%"_val_267__898") {to=7}
5300 | # n1__940
%"view_191__898"<FLOAT16,[unk__1347,unk__1348,unk__1349,unk__1350]> ⬅️ ::Reshape(%"clone_19__898", %"size_0__940")
5301 | # n0__941
%"tmp__941"<INT64,[unk__1351]> ⬅️ ::Shape(%"add_55__898")
5302 | # n1__941
%"int64_m1__941"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_m1')}
5303 | # n2__941
%"tmp_subscripted__941"<INT64,?> ⬅️ ::Gather(%"tmp__941", %"int64_m1__941") {axis=0}
5304 | # n3__941
%"embedding_size__941"<FLOAT16,?> ⬅️ ::CastLike(%"tmp_subscripted__941", %"add_55__898")
5305 | # n4__941
%"const__941"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
5306 | # n5__941
%"tmp_0__941"<FLOAT16,?> ⬅️ ::Sqrt(%"embedding_size__941")
5307 | # n6__941
%"const_cast__941"<FLOAT16,?> ⬅️ ::CastLike(%"const__941", %"tmp_0__941")
5308 | # n7__941
%"_val_269__898"<FLOAT16,?> ⬅️ ::Div(%"const_cast__941", %"tmp_0__941")
5309 | # CastLike_320__898
%"_val_270__898"<FLOAT16,?> ⬅️ ::CastLike(%"_val_269__898", %"add_55__898")
5310 | # n0__942
%"tmp__942"<INT64,[unk__1352]> ⬅️ ::Shape(%"add_55__898")
5311 | # n1__942
%"int64_0_1d__942"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
5312 | # n2__942
%"int64_1_1d__942"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
5313 | # n3__942
%"int64_m2_1d__942"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
5314 | # n4__942
%"int64_m1_1d__942"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
5315 | # n5__942
%"target_length__942"<INT64,[unk__1353]> ⬅️ ::Slice(%"tmp__942", %"int64_m2_1d__942", %"int64_m1_1d__942", %"int64_0_1d__942", %"int64_1_1d__942")
5316 | # n6__942
%"tmp_0__942"<INT64,[4]> ⬅️ ::Shape(%"view_190__898")
5317 | # n7__942
%"int64_0_1d_1__942"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
5318 | # n8__942
%"int64_1_1d_2__942"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
5319 | # n9__942
%"int64_m2_1d_3__942"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_3')}
5320 | # n10__942
%"int64_m1_1d_4__942"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_4')}
5321 | # n11__942
%"source_length__942"<INT64,[1]> ⬅️ ::Slice(%"tmp_0__942", %"int64_m2_1d_3__942", %"int64_m1_1d_4__942", %"int64_0_1d_1__942", %"int64_1_1d_2__942")
5322 | # n12__942
%"size__942"<INT64,[unk__1354]> ⬅️ ::Concat(%"target_length__942", %"source_length__942") {axis=0}
5323 | # n13__942
%"const__942"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
5324 | # n14__942
%"attn_mask__942"<FLOAT,?> ⬅️ ::Expand(%"const__942", %"size__942")
5325 | # n15__942
%"attn_mask_5__942"<FLOAT,?> ⬅️ ::Trilu(%"attn_mask__942") {upper=0}
5326 | # n16__942
%"const_6__942"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_6')}
5327 | # n17__942
%"const_6_cast__942"<FLOAT,?> ⬅️ ::CastLike(%"const_6__942", %"attn_mask_5__942")
5328 | # n18__942
%"tmp_7__942"<BOOL,?> ⬅️ ::Equal(%"attn_mask_5__942", %"const_6_cast__942")
5329 | # n19__942
%"tmp_8__942"<FLOAT,?> ⬅️ ::Constant() {value_float=-inf}
5330 | # n20__942
%"const_9__942"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
5331 | # n21__942
%"const_9_cast__942"<FLOAT,?> ⬅️ ::CastLike(%"const_9__942", %"tmp_8__942")
5332 | # n22__942
%"attn_mask_10__942"<FLOAT,?> ⬅️ ::Where(%"tmp_7__942", %"tmp_8__942", %"const_9_cast__942")
5333 | # n23__942
%"_val_271__898"<FLOAT16,?> ⬅️ ::CastLike(%"attn_mask_10__942", %"add_55__898")
5334 | # n0__943
%"key_shape__943"<INT64,[4]> ⬅️ ::Shape(%"view_190__898")
5335 | # n1__943
%"int64_0_1d__943"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
5336 | # n2__943
%"int64_1_1d__943"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
5337 | # n3__943
%"int64_m1_1d__943"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
5338 | # n4__943
%"int64_9223372036854775807_1d__943"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_9223372036854775807_1d')}
5339 | # n5__943
%"key_last_dim__943"<INT64,[1]> ⬅️ ::Slice(%"key_shape__943", %"int64_m1_1d__943", %"int64_9223372036854775807_1d__943", %"int64_0_1d__943", %"int64_1_1d__943")
5340 | # n6__943
%"int64_0_1d_0__943"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_0')}
5341 | # n7__943
%"int64_1_1d_1__943"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_1')}
5342 | # n8__943
%"int64_m2_1d__943"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
5343 | # n9__943
%"int64_m1_1d_2__943"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d_2')}
5344 | # n10__943
%"key_second_last_dim__943"<INT64,[1]> ⬅️ ::Slice(%"key_shape__943", %"int64_m2_1d__943", %"int64_m1_1d_2__943", %"int64_0_1d_0__943", %"int64_1_1d_1__943")
5345 | # n11__943
%"int64_0_1d_3__943"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
5346 | # n12__943
%"int64_1_1d_4__943"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
5347 | # n13__943
%"int64_m2_1d_5__943"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d_5')}
5348 | # n14__943
%"key_first_dims__943"<INT64,[2]> ⬅️ ::Slice(%"key_shape__943", %"int64_0_1d_3__943", %"int64_m2_1d_5__943", %"int64_0_1d_3__943", %"int64_1_1d_4__943")
5349 | # n15__943
%"tmp__943"<INT64,[1]> ⬅️ ::Constant() {value_ints=[-1]}
5350 | # n16__943
%"key_squeezed_shape__943"<INT64,[3]> ⬅️ ::Concat(%"tmp__943", %"key_second_last_dim__943", %"key_last_dim__943") {axis=0}
5351 | # n17__943
%"key_squeezed__943"<FLOAT16,[unk__1355,unk__1356,unk__1357]> ⬅️ ::Reshape(%"view_190__898", %"key_squeezed_shape__943")
5352 | # n18__943
%"key_squeezed_transposed__943"<FLOAT16,[unk__1355,unk__1357,unk__1356]> ⬅️ ::Transpose(%"key_squeezed__943") {perm=[0, 2, 1]}
5353 | # n19__943
%"key_transposed_shape__943"<INT64,[4]> ⬅️ ::Concat(%"key_first_dims__943", %"key_last_dim__943", %"key_second_last_dim__943") {axis=0}
5354 | # n20__943
%"key_transposed__943"<FLOAT16,[unk__1358,unk__1359,unk__1360,unk__1361]> ⬅️ ::Reshape(%"key_squeezed_transposed__943", %"key_transposed_shape__943")
5355 | # n21__943
%"tmp_6__943"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__898")
5356 | # n22__943
%"query_scaled__943"<FLOAT16,?> ⬅️ ::Mul(%"add_55__898", %"tmp_6__943")
5357 | # n23__943
%"tmp_7__943"<FLOAT16,?> ⬅️ ::Sqrt(%"_val_270__898")
5358 | # n24__943
%"key_transposed_scaled__943"<FLOAT16,[unk__1358,unk__1359,unk__1360,unk__1361]> ⬅️ ::Mul(%"key_transposed__943", %"tmp_7__943")
5359 | # n25__943
%"tmp_8__943"<FLOAT16,?> ⬅️ ::MatMul(%"query_scaled__943", %"key_transposed_scaled__943")
5360 | # n26__943
%"tmp_9__943"<FLOAT16,?> ⬅️ ::Add(%"tmp_8__943", %"_val_271__898")
5361 | # n27__943
%"attn_weight__943"<FLOAT16,?> ⬅️ ::Softmax(%"tmp_9__943") {axis=-1}
5362 | # n28__943
%"dropout_p__943"<FLOAT,?> ⬅️ ::Constant() {value_float=0.0}
5363 | # n29__943
%"attn_weight_10__943"<FLOAT16,?>, %"___943"<BOOL,?> ⬅️ ::Dropout(%"attn_weight__943", %"dropout_p__943")
5364 | # n30__943
%"_scaled_dot_product_efficient_attention_9__898"<FLOAT16,?> ⬅️ ::MatMul(%"attn_weight_10__943", %"view_191__898")
5365 | # n0__944
%"query_0__944"<FLOAT16,?> ⬅️ ::Transpose(%"add_55__898") {perm=[0, 2, 1, 3]}
5366 | # n1__944
%"query_shape__944"<INT64,[unk__1362]> ⬅️ ::Shape(%"query_0__944")
5367 | # n2__944
%"int64_0_1d__944"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d')}
5368 | # n3__944
%"int64_1_1d__944"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d')}
5369 | # n4__944
%"query_first_dims__944"<INT64,[unk__1363]> ⬅️ ::Slice(%"query_shape__944", %"int64_0_1d__944", %"int64_1_1d__944", %"int64_0_1d__944", %"int64_1_1d__944")
5370 | # n5__944
%"int64_0_1d_1__944"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_1')}
5371 | # n6__944
%"int64_1_1d_2__944"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_2')}
5372 | # n7__944
%"int64_2_1d__944"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_2_1d')}
5373 | # n8__944
%"query_second_dims__944"<INT64,[unk__1364]> ⬅️ ::Slice(%"query_shape__944", %"int64_1_1d_2__944", %"int64_2_1d__944", %"int64_0_1d_1__944", %"int64_1_1d_2__944")
5374 | # n9__944
%"int64_0_1d_3__944"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_3')}
5375 | # n10__944
%"int64_1_1d_4__944"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_1_1d_4')}
5376 | # n11__944
%"int64_m2_1d__944"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m2_1d')}
5377 | # n12__944
%"int64_m1_1d__944"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_m1_1d')}
5378 | # n13__944
%"num_heads__944"<INT64,[unk__1365]> ⬅️ ::Slice(%"query_shape__944", %"int64_m2_1d__944", %"int64_m1_1d__944", %"int64_0_1d_3__944", %"int64_1_1d_4__944")
5379 | # n14__944
%"compute_log_sumexp__944"<INT64,?> ⬅️ ::Constant() {value_int=0}
5380 | # n15__944
%"compute_log_sumexp_as_bool__944"<BOOL,?> ⬅️ ::Cast(%"compute_log_sumexp__944") {to=9}
5381 | # n16__944
%"_scaled_dot_product_efficient_attention_9_1__898"<FLOAT,?> ⬅️ ::If(%"compute_log_sumexp_as_bool__944") {then_branch=
graph(
name=thenGraph_14,
inputs=(
),
outputs=(
%"logsum_exp__944"<FLOAT,?>
),
) {
0 | # n0__944_399
%"tmp__944"<FLOAT,[unk__1364]> ⬅️ ::Cast(%"query_second_dims__944") {to=1}
1 | # n1__944_400
%"const__944"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const')}
2 | # n2__944_401
%"const_cast__944"<FLOAT,?> ⬅️ ::CastLike(%"const__944", %"tmp__944")
3 | # n3__944_402
%"tmp_5__944"<FLOAT,[unk__1364]> ⬅️ ::Div(%"tmp__944", %"const_cast__944")
4 | # n4__944_403
%"tmp_6__944"<FLOAT,[unk__1364]> ⬅️ ::Ceil(%"tmp_5__944")
5 | # n5__944_404
%"const_7__944"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_7')}
6 | # n6__944_405
%"const_7_cast__944"<FLOAT,?> ⬅️ ::CastLike(%"const_7__944", %"tmp_6__944")
7 | # n7__944_406
%"tmp_8__944"<FLOAT,[unk__1364]> ⬅️ ::Mul(%"tmp_6__944", %"const_7_cast__944")
8 | # n8__944_407
%"logsumexp_dim__944"<INT64,[unk__1364]> ⬅️ ::Cast(%"tmp_8__944") {to=7}
9 | # n9__944_408
%"const_9__944"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_9')}
10 | # n10__944_409
%"tmp_10__944"<INT64,[unk__1366]> ⬅️ ::Concat(%"query_first_dims__944", %"num_heads__944", %"logsumexp_dim__944") {axis=0}
11 | # n11__944_410
%"logsum_exp__944"<FLOAT,?> ⬅️ ::Expand(%"const_9__944", %"tmp_10__944")
return %"logsum_exp__944"<FLOAT,?>
}, else_branch=
graph(
name=elseGraph_14,
inputs=(
),
outputs=(
%"logsum_exp_14__944"<FLOAT,?>
),
) {
0 | # n0__944_411
%"const_11__944"<FLOAT,?> ⬅️ ::Constant() {value=FloatDataTensor<FLOAT,[]>('const_11')}
1 | # n1__944_412
%"int64_0_1d_12__944"<INT64,[1]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[1]>('int64_0_1d_12')}
2 | # n2__944_413
%"int64_0_1d_12_cast__944"<INT64,[1]> ⬅️ ::CastLike(%"int64_0_1d_12__944", %"num_heads__944")
3 | # n3__944_414
%"tmp_13__944"<INT64,[unk__1367]> ⬅️ ::Concat(%"query_first_dims__944", %"num_heads__944", %"int64_0_1d_12_cast__944") {axis=0}
4 | # n4__944_415
%"logsum_exp_14__944"<FLOAT,?> ⬅️ ::Expand(%"const_11__944", %"tmp_13__944")
return %"logsum_exp_14__944"<FLOAT,?>
}}
5382 | # n17__944
%"tmp_16__944"<INT64,[0]> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[0]>('Empty_INTS')}
5383 | # n18__944
%"tmp_17__944"<FLOAT,?> ⬅️ ::ConstantOfShape(%"tmp_16__944")
5384 | # n19__944
%"_scaled_dot_product_efficient_attention_9_3__898"<INT64,?> ⬅️ ::Cast(%"tmp_17__944") {to=7}
5385 | # Transpose_324__898
%"transpose_39__898"<FLOAT16,?> ⬅️ ::Transpose(%"_scaled_dot_product_efficient_attention_9__898") {perm=[0, 2, 1, 3]}
5386 | # Constant_325__898
%"_val_276__898"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
5387 | # n0__945
%"size_0__945"<INT64,[3]> ⬅️ ::Cast(%"_val_276__898") {to=7}
5388 | # n1__945
%"view_192__898"<FLOAT16,[unk__1368,unk__1369,unk__1370]> ⬅️ ::Reshape(%"transpose_39__898", %"size_0__945")
5389 | # n0__948
%"tmp__948"<INT64,[2]> ⬅️ ::Shape(%"model.layers.9.self_attn.o_proj.weight")
5390 | # n1__948
%"rank__947"<INT64,?> ⬅️ ::Size(%"tmp__948")
5391 | # n1__947
%"int64_2__947"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
5392 | # n2__947
%"int64_2_cast__947"<INT64,?> ⬅️ ::CastLike(%"int64_2__947", %"rank__947")
5393 | # n3__947
%"cond__947"<BOOL,?> ⬅️ ::Equal(%"rank__947", %"int64_2_cast__947")
5394 | # n4__947
%"t_66__946"<FLOAT16,[4096,4096]> ⬅️ ::If(%"cond__947") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__947"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__947_416
%"result__947"<FLOAT16,[4096,4096]> ⬅️ ::Transpose(%"model.layers.9.self_attn.o_proj.weight") {perm=[1, 0]}
return %"result__947"<FLOAT16,[4096,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__947"<FLOAT16,[4096,4096]>
),
) {
0 | # n0__947_417
%"result_0__947"<FLOAT16,[4096,4096]> ⬅️ ::Identity(%"model.layers.9.self_attn.o_proj.weight")
return %"result_0__947"<FLOAT16,[4096,4096]>
}}
5395 | # Constant_3__946
%"_val_3__946"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
5396 | # n0__949
%"size_0__949"<INT64,[2]> ⬅️ ::Cast(%"_val_3__946") {to=7}
5397 | # n1__949
%"view_193__946"<FLOAT16,[unk__1371,unk__1372]> ⬅️ ::Reshape(%"view_192__898", %"size_0__949")
5398 | # n0__950
%"mm_66__946"<FLOAT16,[unk__1371,4096]> ⬅️ ::MatMul(%"view_193__946", %"t_66__946")
5399 | # Constant_6__946
%"_val_6__946"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
5400 | # n0__951
%"size_0__951"<INT64,[3]> ⬅️ ::Cast(%"_val_6__946") {to=7}
5401 | # n1__951
%"model_layers_9_self_attn_1_2__888"<FLOAT16,[unk__1373,unk__1374,unk__1375]> ⬅️ ::Reshape(%"mm_66__946", %"size_0__951")
5402 | # n0__952
%"alpha__952"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
5403 | # n1__952
%"alpha_0__952"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__952", %"model_layers_9_self_attn_1_2__888")
5404 | # n2__952
%"other_1__952"<FLOAT16,[unk__1373,unk__1374,unk__1375]> ⬅️ ::Mul(%"model_layers_9_self_attn_1_2__888", %"alpha_0__952")
5405 | # n3__952
%"add_57__888"<FLOAT16,[unk__1376,128,4096]> ⬅️ ::Add(%"model_layers_8_1_2__1", %"other_1__952")
5406 | # Cast_3__953
%"_to_copy_56__953"<FLOAT,[unk__1376,128,4096]> ⬅️ ::Cast(%"add_57__888") {to=1}
5407 | # Constant_4__953
%"_val_2__953"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5408 | # Cast_5__953
%"scalar_tensor_default_19__953"<FLOAT,?> ⬅️ ::Cast(%"_val_2__953") {to=1}
5409 | # n0__954
%"pow_20__953"<FLOAT,[unk__1376,128,4096]> ⬅️ ::Pow(%"_to_copy_56__953", %"scalar_tensor_default_19__953")
5410 | # Constant_7__953
%"_val_5__953"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
5411 | # n0__956
%"tmp__956"<INT64,[3]> ⬅️ ::Shape(%"pow_20__953")
5412 | # n1__956
%"tmp_0__956"<INT64,?> ⬅️ ::Size(%"tmp__956")
5413 | # n2__956
%"tmp_1__956"<INT64,?> ⬅️ ::Constant() {value_int=0}
5414 | # n3__956
%"cond__955"<BOOL,?> ⬅️ ::Equal(%"tmp_0__956", %"tmp_1__956")
5415 | # n1__955
%"mean_19__953"<FLOAT,?> ⬅️ ::If(%"cond__955") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__955"<FLOAT,[unk__1376,128,4096]>
),
) {
0 | # n0__955_418
%"result__955"<FLOAT,[unk__1376,128,4096]> ⬅️ ::Identity(%"pow_20__953")
return %"result__955"<FLOAT,[unk__1376,128,4096]>
}, else_branch=
graph(
name=elseGraph_5,
inputs=(
),
outputs=(
%"result_4__955"<FLOAT,?>
),
) {
0 | # n0__957
%"tmp__957"<INT64,[1]> ⬅️ ::Shape(%"_val_5__953")
1 | # n1__957
%"tmp_0__957"<INT64,?> ⬅️ ::Size(%"tmp__957")
2 | # n2__957
%"tmp_1__957"<INT64,?> ⬅️ ::Constant() {value_int=0}
3 | # n3__957
%"cond_0__955"<BOOL,?> ⬅️ ::Equal(%"tmp_0__957", %"tmp_1__957")
4 | # n1__955_420
%"dim_3__955"<INT64,?> ⬅️ ::If(%"cond_0__955") {then_branch=
graph(
name=thenGraph_8,
inputs=(
),
outputs=(
%"dim_1__955"<INT64,[1,1]>
),
) {
0 | # n0__955_421
%"int64_0__955"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_0')}
1 | # n1__955_422
%"dim_1__955"<INT64,[1,1]> ⬅️ ::Unsqueeze(%"_val_5__953", %"int64_0__955")
return %"dim_1__955"<INT64,[1,1]>
}, else_branch=
graph(
name=elseGraph_8,
inputs=(
),
outputs=(
%"dim_2__955"<INT64,[1]>
),
) {
0 | # n0__955_423
%"dim_2__955"<INT64,[1]> ⬅️ ::Identity(%"_val_5__953")
return %"dim_2__955"<INT64,[1]>
}}
5 | # n2__955
%"result_4__955"<FLOAT,?> ⬅️ ::ReduceMean(%"pow_20__953", %"dim_3__955") {keepdims=1}
return %"result_4__955"<FLOAT,?>
}}
5416 | # Constant_9__953
%"_val_7__953"<FLOAT,?> ⬅️ ::Constant() {value=TensorProtoTensor<FLOAT,[]>('')}
5417 | # n0__958
%"alpha__958"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
5418 | # n1__958
%"alpha_0__958"<FLOAT,?> ⬅️ ::CastLike(%"alpha__958", %"_val_7__953")
5419 | # n2__958
%"other_1__958"<FLOAT,?> ⬅️ ::Mul(%"_val_7__953", %"alpha_0__958")
5420 | # n3__958
%"add_58__953"<FLOAT,?> ⬅️ ::Add(%"mean_19__953", %"other_1__958")
5421 | # n0__959
%"tmp__959"<FLOAT,?> ⬅️ ::Sqrt(%"add_58__953")
5422 | # n1__959
%"rsqrt_19__953"<FLOAT,?> ⬅️ ::Reciprocal(%"tmp__959")
5423 | # n0__960
%"mul_96__953"<FLOAT,?> ⬅️ ::Mul(%"_to_copy_56__953", %"rsqrt_19__953")
5424 | # Cast_13__953
%"_to_copy_57__953"<FLOAT16,?> ⬅️ ::Cast(%"mul_96__953") {to=10}
5425 | # n0__961
%"model_layers_9_post_attention_layernorm_1__888"<FLOAT16,?> ⬅️ ::Mul(%"model.layers.9.post_attention_layernorm.weight", %"_to_copy_57__953")
5426 | # n0__965
%"tmp__965"<INT64,[2]> ⬅️ ::Shape(%"model.layers.9.mlp.gate_proj.weight")
5427 | # n1__965
%"rank__964"<INT64,?> ⬅️ ::Size(%"tmp__965")
5428 | # n1__964
%"int64_2__964"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
5429 | # n2__964
%"int64_2_cast__964"<INT64,?> ⬅️ ::CastLike(%"int64_2__964", %"rank__964")
5430 | # n3__964
%"cond__964"<BOOL,?> ⬅️ ::Equal(%"rank__964", %"int64_2_cast__964")
5431 | # n4__964
%"t_67__963"<FLOAT16,[unk__1377,unk__1378]> ⬅️ ::If(%"cond__964") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__964"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__964_424
%"result__964"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.9.mlp.gate_proj.weight") {perm=[1, 0]}
return %"result__964"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__964"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__964_425
%"result_0__964"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.9.mlp.gate_proj.weight")
return %"result_0__964"<FLOAT16,[14336,4096]>
}}
5432 | # Constant_3__963
%"_val_3__963"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
5433 | # n0__966
%"size_0__966"<INT64,[2]> ⬅️ ::Cast(%"_val_3__963") {to=7}
5434 | # n1__966
%"view_195__963"<FLOAT16,[unk__1379,unk__1380]> ⬅️ ::Reshape(%"model_layers_9_post_attention_layernorm_1__888", %"size_0__966")
5435 | # n0__967
%"mm_67__963"<FLOAT16,[unk__1379,unk__1378]> ⬅️ ::MatMul(%"view_195__963", %"t_67__963")
5436 | # Constant_6__963
%"_val_6__963"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
5437 | # n0__968
%"size_0__968"<INT64,[3]> ⬅️ ::Cast(%"_val_6__963") {to=7}
5438 | # n1__968
%"model_layers_9_mlp_gate_proj_1__962"<FLOAT16,[unk__1381,unk__1382,unk__1383]> ⬅️ ::Reshape(%"mm_67__963", %"size_0__968")
5439 | # Cast_0__969
%"_to_copy_58__969"<FLOAT,[unk__1381,unk__1382,unk__1383]> ⬅️ ::Cast(%"model_layers_9_mlp_gate_proj_1__962") {to=1}
5440 | # n0__970
%"sigmoid_9__969"<FLOAT,[unk__1381,unk__1382,unk__1383]> ⬅️ ::Sigmoid(%"_to_copy_58__969")
5441 | # n0__971
%"mul_98__969"<FLOAT,[unk__1381,unk__1382,unk__1383]> ⬅️ ::Mul(%"_to_copy_58__969", %"sigmoid_9__969")
5442 | # Cast_3__969
%"model_layers_9_mlp_act_fn_1__962"<FLOAT16,[unk__1381,unk__1382,unk__1383]> ⬅️ ::Cast(%"mul_98__969") {to=10}
5443 | # n0__974
%"tmp__974"<INT64,[2]> ⬅️ ::Shape(%"model.layers.9.mlp.up_proj.weight")
5444 | # n1__974
%"rank__973"<INT64,?> ⬅️ ::Size(%"tmp__974")
5445 | # n1__973
%"int64_2__973"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
5446 | # n2__973
%"int64_2_cast__973"<INT64,?> ⬅️ ::CastLike(%"int64_2__973", %"rank__973")
5447 | # n3__973
%"cond__973"<BOOL,?> ⬅️ ::Equal(%"rank__973", %"int64_2_cast__973")
5448 | # n4__973
%"t_68__972"<FLOAT16,[unk__1384,unk__1385]> ⬅️ ::If(%"cond__973") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__973"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__973_426
%"result__973"<FLOAT16,[4096,14336]> ⬅️ ::Transpose(%"model.layers.9.mlp.up_proj.weight") {perm=[1, 0]}
return %"result__973"<FLOAT16,[4096,14336]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__973"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__973_427
%"result_0__973"<FLOAT16,[14336,4096]> ⬅️ ::Identity(%"model.layers.9.mlp.up_proj.weight")
return %"result_0__973"<FLOAT16,[14336,4096]>
}}
5449 | # Constant_3__972
%"_val_3__972"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
5450 | # n0__975
%"size_0__975"<INT64,[2]> ⬅️ ::Cast(%"_val_3__972") {to=7}
5451 | # n1__975
%"view_197__972"<FLOAT16,[unk__1386,unk__1387]> ⬅️ ::Reshape(%"model_layers_9_post_attention_layernorm_1__888", %"size_0__975")
5452 | # n0__976
%"mm_68__972"<FLOAT16,[unk__1386,unk__1385]> ⬅️ ::MatMul(%"view_197__972", %"t_68__972")
5453 | # Constant_6__972
%"_val_6__972"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
5454 | # n0__977
%"size_0__977"<INT64,[3]> ⬅️ ::Cast(%"_val_6__972") {to=7}
5455 | # n1__977
%"model_layers_9_mlp_up_proj_1__962"<FLOAT16,[unk__1388,unk__1389,unk__1390]> ⬅️ ::Reshape(%"mm_68__972", %"size_0__977")
5456 | # n0__978
%"mul_99__962"<FLOAT16,[unk__1391,unk__1392,unk__1393]> ⬅️ ::Mul(%"model_layers_9_mlp_act_fn_1__962", %"model_layers_9_mlp_up_proj_1__962")
5457 | # n0__981
%"tmp__981"<INT64,[2]> ⬅️ ::Shape(%"model.layers.9.mlp.down_proj.weight")
5458 | # n1__981
%"rank__980"<INT64,?> ⬅️ ::Size(%"tmp__981")
5459 | # n1__980
%"int64_2__980"<INT64,?> ⬅️ ::Constant() {value=Int64DataTensor<INT64,[]>('int64_2')}
5460 | # n2__980
%"int64_2_cast__980"<INT64,?> ⬅️ ::CastLike(%"int64_2__980", %"rank__980")
5461 | # n3__980
%"cond__980"<BOOL,?> ⬅️ ::Equal(%"rank__980", %"int64_2_cast__980")
5462 | # n4__980
%"t_69__979"<FLOAT16,[unk__1394,unk__1395]> ⬅️ ::If(%"cond__980") {then_branch=
graph(
name=thenGraph_6,
inputs=(
),
outputs=(
%"result__980"<FLOAT16,[14336,4096]>
),
) {
0 | # n0__980_428
%"result__980"<FLOAT16,[14336,4096]> ⬅️ ::Transpose(%"model.layers.9.mlp.down_proj.weight") {perm=[1, 0]}
return %"result__980"<FLOAT16,[14336,4096]>
}, else_branch=
graph(
name=elseGraph_6,
inputs=(
),
outputs=(
%"result_0__980"<FLOAT16,[4096,14336]>
),
) {
0 | # n0__980_429
%"result_0__980"<FLOAT16,[4096,14336]> ⬅️ ::Identity(%"model.layers.9.mlp.down_proj.weight")
return %"result_0__980"<FLOAT16,[4096,14336]>
}}
5463 | # Constant_3__979
%"_val_3__979"<INT64,[2]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[2]>('')}
5464 | # n0__982
%"size_0__982"<INT64,[2]> ⬅️ ::Cast(%"_val_3__979") {to=7}
5465 | # n1__982
%"view_199__979"<FLOAT16,[unk__1396,unk__1397]> ⬅️ ::Reshape(%"mul_99__962", %"size_0__982")
5466 | # n0__983
%"mm_69__979"<FLOAT16,[unk__1396,unk__1395]> ⬅️ ::MatMul(%"view_199__979", %"t_69__979")
5467 | # Constant_6__979
%"_val_6__979"<INT64,[3]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[3]>('')}
5468 | # n0__984
%"size_0__984"<INT64,[3]> ⬅️ ::Cast(%"_val_6__979") {to=7}
5469 | # n1__984
%"model_layers_9_mlp_1__888"<FLOAT16,[unk__1398,unk__1399,unk__1400]> ⬅️ ::Reshape(%"mm_69__979", %"size_0__984")
5470 | # n0__985
%"alpha__985"<FLOAT,?> ⬅️ ::Constant() {value_float=1.0}
5471 | # n1__985
%"alpha_0__985"<FLOAT16,?> ⬅️ ::CastLike(%"alpha__985", %"model_layers_9_mlp_1__888")
5472 | # n2__985
%"other_1__985"<FLOAT16,[unk__1398,unk__1399,unk__1400]> ⬅️ ::Mul(%"model_layers_9_mlp_1__888", %"alpha_0__985")
5473 | # n3__985
%"model_layers_9_1_2__1"<FLOAT16,[unk__1401,128,4096]> ⬅️ ::Add(%"add_57__888", %"other_1__985")
5474 | # Cast_3__987
%"_to_copy_60__987"<FLOAT,[unk__1401,128,4096]> ⬅️ ::Cast(%"model_layers_9_1_2__1") {to=1}
5475 | # Constant_4__987
%"_val_2__987"<INT64,?> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[]>('')}
5476 | # Cast_5__987
%"scalar_tensor_default_20__987"<FLOAT,?> ⬅️ ::Cast(%"_val_2__987") {to=1}
5477 | # n0__988
%"pow_21__987"<FLOAT,[unk__1401,128,4096]> ⬅️ ::Pow(%"_to_copy_60__987", %"scalar_tensor_default_20__987")
5478 | # Constant_7__987
%"_val_5__987"<INT64,[1]> ⬅️ ::Constant() {value=TensorProtoTensor<INT64,[1]>('')}
5479 | # n0__990
%"tmp__990"<INT64,[3]> ⬅️ ::Shape(%"pow_21__987")
5480 | # n1__990
%"tmp_0__990"<INT64,?> ⬅️ ::Size(%"tmp__990")
5481 | # n2__990
%"tmp_1__990"<INT64,?> ⬅️ ::Constant() {value_int=0}
5482 | # n3__990
%"cond__989"<BOOL,?> ⬅️ ::Equal(%"tmp_0__990", %"tmp_1__990")
5483 | # n1__989
%"mean_20__987"<FLOAT,?> ⬅️ ::If(%"cond__989") {then_branch=
graph(
name=thenGraph_5,
inputs=(
),
outputs=(
%"result__989"<FLOAT,[unk__1401,128,4096]>
),
) {
0 | # n0__989_430
%"result__989"<FLOAT,[unk__1401,128,4096]> ⬅️ ::Identity(%"pow_21__987")
return %"result__989"<FLOAT,[unk__1401,128,4096]>
}, else_branch=
graph(
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment