Skip to content

Instantly share code, notes, and snippets.

@pashu123
Created February 11, 2022 13:08
Show Gist options
  • Save pashu123/7b17c6ab89fc9b3f06257e05e3ce6272 to your computer and use it in GitHub Desktop.
Save pashu123/7b17c6ab89fc9b3f06257e05e3ce6272 to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
// -----// IR Dump After SymbolDCE //----- //
module attributes {torch.debug_module_name = "GraphModule"} {
func private @__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule.forward(%arg0: !torch.nn.Module<"__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule">, %arg1: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg2: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg3: !torch.tensor {torch.type_bound = !torch.vtensor<[512,768],f32>}, %arg4: !torch.tensor {torch.type_bound = !torch.vtensor<[2,768],f32>}, %arg5: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,768],f32>}, %arg6: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg7: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg8: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg9: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg10: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg11: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg12: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg13: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg14: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg15: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg16: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg17: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg18: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg19: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg20: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg21: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg22: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg23: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg24: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg25: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg26: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg27: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg28: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg29: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg30: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg31: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg32: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg33: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg34: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg35: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg36: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg37: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg38: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg39: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg40: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg41: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg42: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg43: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg44: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg45: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg46: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg47: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg48: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg49: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg50: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg51: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg52: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg53: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg54: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg55: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg56: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg57: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg58: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg59: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg60: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg61: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg62: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg63: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg64: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg65: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg66: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg67: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg68: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg69: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg70: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg71: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg72: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg73: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg74: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg75: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg76: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg77: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg78: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg79: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg80: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg81: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg82: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg83: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg84: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg85: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg86: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg87: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg88: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg89: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg90: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg91: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg92: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg93: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg94: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg95: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg96: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg97: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg98: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg99: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg100: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg101: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg102: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg103: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg104: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg105: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg106: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg107: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg108: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg109: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg110: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg111: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg112: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg113: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg114: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg115: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg116: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg117: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg118: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg119: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg120: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg121: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg122: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg123: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg124: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg125: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg126: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg127: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg128: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg129: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg130: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg131: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg132: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg133: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg134: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg135: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg136: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg137: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg138: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg139: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg140: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg141: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg142: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg143: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg144: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg145: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg146: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg147: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg148: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg149: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg150: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg151: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg152: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg153: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg154: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg155: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg156: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg157: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg158: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg159: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg160: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg161: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg162: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg163: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg164: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg165: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg166: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg167: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg168: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg169: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg170: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg171: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg172: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg173: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg174: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg175: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg176: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg177: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg178: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg179: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg180: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg181: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg182: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg183: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg184: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg185: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg186: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg187: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg188: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg189: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg190: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg191: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg192: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg193: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg194: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg195: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg196: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg197: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg198: !torch.tensor {torch.type_bound = !torch.vtensor<[30522],f32>}, %arg199: !torch.tensor {torch.type_bound = !torch.vtensor<[30522],f32>}, %arg200: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,768],f32>}, %arg201: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg202: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg203: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg204: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg205: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg206: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg207: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512],si64>}, %arg208: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512],si64>}) -> !torch.list<!torch.tensor> {
%1 = torch.tensor.literal(dense<-0.000000e+00> : tensor<4x1x1x512xf32>) : !torch.tensor<[4,1,1,512],f32>
%2 = torch.tensor.literal(dense<8.000000e+00> : tensor<f64>) : !torch.tensor<[],f64>
%none = torch.constant.none
%false = torch.constant.bool false
%int0 = torch.constant.int 0
%int9223372036854775807 = torch.constant.int 9223372036854775807
%int4 = torch.constant.int 4
%int512 = torch.constant.int 512
%int768 = torch.constant.int 768
%float9.999990e-13 = torch.constant.float 9.9999999999999998E-13
%float9.000000e-01 = torch.constant.float 9.000000e-01
%int2048 = torch.constant.int 2048
%int12 = torch.constant.int 12
%int64 = torch.constant.int 64
%int2 = torch.constant.int 2
%int1 = torch.constant.int 1
%int3 = torch.constant.int 3
%int-1 = torch.constant.int -1
%int-2 = torch.constant.int -2
%int48 = torch.constant.int 48
%int3072 = torch.constant.int 3072
%int30522 = torch.constant.int 30522
%int-100 = torch.constant.int -100
%3 = torch.aten.slice.Tensor %arg206, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%4 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%5 = torch.aten.expand %3, %4, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%6 = torch.aten.slice.Tensor %arg205, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%7 = torch.aten.embedding %arg5, %arg207, %int0, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%8 = torch.aten.embedding %arg4, %5, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%9 = torch.aten.add.Tensor %7, %8, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%10 = torch.aten.embedding %arg3, %6, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%11 = torch.aten.add_.Tensor %9, %10, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%12 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0, %result1, %result2 = torch.aten.native_layer_norm %11, %12, %arg2, %arg1, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%13 = torch.aten.empty_like %result0, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%14 = torch.operator "aten.bernoulli_.float"(%13, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%15 = torch.aten.div_.Scalar %14, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%16 = torch.aten.mul.Tensor %result0, %15 : !torch.tensor, !torch.tensor -> !torch.tensor
%17 = torch.aten.t %arg13 : !torch.tensor -> !torch.tensor
%18 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%19 = torch.aten.view %16, %18 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%20 = torch.aten.mm %19, %17 : !torch.tensor, !torch.tensor -> !torch.tensor
%21 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%22 = torch.operator "aten._unsafe_view"(%20, %21) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%23 = torch.aten.add_.Tensor %22, %arg12, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%24 = torch.aten.t %arg11 : !torch.tensor -> !torch.tensor
%25 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%26 = torch.aten.view %16, %25 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%27 = torch.aten.mm %26, %24 : !torch.tensor, !torch.tensor -> !torch.tensor
%28 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%29 = torch.operator "aten._unsafe_view"(%27, %28) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%30 = torch.aten.add_.Tensor %29, %arg10, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%31 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%32 = torch.aten.view %30, %31 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%33 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%34 = torch.aten.permute %32, %33 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%35 = torch.aten.t %arg15 : !torch.tensor -> !torch.tensor
%36 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%37 = torch.aten.view %16, %36 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%38 = torch.aten.mm %37, %35 : !torch.tensor, !torch.tensor -> !torch.tensor
%39 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%40 = torch.operator "aten._unsafe_view"(%38, %39) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%41 = torch.aten.add_.Tensor %40, %arg14, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%42 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%43 = torch.aten.view %41, %42 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%44 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%45 = torch.aten.permute %43, %44 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%46 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%47 = torch.aten.view %23, %46 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%48 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%49 = torch.aten.permute %47, %48 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%50 = torch.aten.transpose.int %34, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%51 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%52 = torch.aten.expand %49, %51, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%53 = torch.operator "aten.clone"(%52, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%54 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%55 = torch.operator "aten._unsafe_view"(%53, %54) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%56 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%57 = torch.aten.expand %50, %56, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%58 = torch.operator "aten.clone"(%57, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%59 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%60 = torch.operator "aten._unsafe_view"(%58, %59) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%61 = torch.aten.bmm %55, %60 : !torch.tensor, !torch.tensor -> !torch.tensor
%62 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%63 = torch.operator "aten._unsafe_view"(%61, %62) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%64 = torch.aten.div.Tensor %63, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%65 = torch.aten.add.Tensor %64, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%66 = torch.aten._softmax %65, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%67 = torch.aten.empty_like %66, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%68 = torch.operator "aten.bernoulli_.float"(%67, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%69 = torch.aten.div_.Scalar %68, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%70 = torch.aten.mul.Tensor %66, %69 : !torch.tensor, !torch.tensor -> !torch.tensor
%71 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%72 = torch.aten.expand %70, %71, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%73 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%74 = torch.aten.view %72, %73 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%75 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%76 = torch.aten.expand %45, %75, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%77 = torch.operator "aten.clone"(%76, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%78 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%79 = torch.operator "aten._unsafe_view"(%77, %78) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%80 = torch.aten.bmm %74, %79 : !torch.tensor, !torch.tensor -> !torch.tensor
%81 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%82 = torch.operator "aten._unsafe_view"(%80, %81) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%83 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%84 = torch.aten.permute %82, %83 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%85 = torch.operator "aten.clone"(%84, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%86 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%87 = torch.aten.view %85, %86 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%88 = torch.aten.t %arg9 : !torch.tensor -> !torch.tensor
%89 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%90 = torch.aten.view %87, %89 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%91 = torch.aten.mm %90, %88 : !torch.tensor, !torch.tensor -> !torch.tensor
%92 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%93 = torch.operator "aten._unsafe_view"(%91, %92) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%94 = torch.aten.add_.Tensor %93, %arg8, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%95 = torch.aten.empty_like %94, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%96 = torch.operator "aten.bernoulli_.float"(%95, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%97 = torch.aten.div_.Scalar %96, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%98 = torch.aten.mul.Tensor %94, %97 : !torch.tensor, !torch.tensor -> !torch.tensor
%99 = torch.aten.add.Tensor %98, %16, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%100 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_0, %result1_1, %result2_2 = torch.aten.native_layer_norm %99, %100, %arg7, %arg6, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%101 = torch.aten.t %arg17 : !torch.tensor -> !torch.tensor
%102 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%103 = torch.aten.view %result0_0, %102 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%104 = torch.aten.mm %103, %101 : !torch.tensor, !torch.tensor -> !torch.tensor
%105 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%106 = torch.operator "aten._unsafe_view"(%104, %105) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%107 = torch.aten.add_.Tensor %106, %arg16, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%108 = torch.aten.gelu %107 : !torch.tensor -> !torch.tensor
%109 = torch.aten.t %arg21 : !torch.tensor -> !torch.tensor
%110 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%111 = torch.aten.view %108, %110 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%112 = torch.aten.mm %111, %109 : !torch.tensor, !torch.tensor -> !torch.tensor
%113 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%114 = torch.operator "aten._unsafe_view"(%112, %113) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%115 = torch.aten.add_.Tensor %114, %arg20, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%116 = torch.aten.empty_like %115, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%117 = torch.operator "aten.bernoulli_.float"(%116, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%118 = torch.aten.div_.Scalar %117, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%119 = torch.aten.mul.Tensor %115, %118 : !torch.tensor, !torch.tensor -> !torch.tensor
%120 = torch.aten.add.Tensor %119, %result0_0, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%121 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_3, %result1_4, %result2_5 = torch.aten.native_layer_norm %120, %121, %arg19, %arg18, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%122 = torch.aten.t %arg29 : !torch.tensor -> !torch.tensor
%123 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%124 = torch.aten.view %result0_3, %123 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%125 = torch.aten.mm %124, %122 : !torch.tensor, !torch.tensor -> !torch.tensor
%126 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%127 = torch.operator "aten._unsafe_view"(%125, %126) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%128 = torch.aten.add_.Tensor %127, %arg28, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%129 = torch.aten.t %arg27 : !torch.tensor -> !torch.tensor
%130 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%131 = torch.aten.view %result0_3, %130 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%132 = torch.aten.mm %131, %129 : !torch.tensor, !torch.tensor -> !torch.tensor
%133 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%134 = torch.operator "aten._unsafe_view"(%132, %133) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%135 = torch.aten.add_.Tensor %134, %arg26, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%136 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%137 = torch.aten.view %135, %136 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%138 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%139 = torch.aten.permute %137, %138 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%140 = torch.aten.t %arg31 : !torch.tensor -> !torch.tensor
%141 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%142 = torch.aten.view %result0_3, %141 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%143 = torch.aten.mm %142, %140 : !torch.tensor, !torch.tensor -> !torch.tensor
%144 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%145 = torch.operator "aten._unsafe_view"(%143, %144) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%146 = torch.aten.add_.Tensor %145, %arg30, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%147 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%148 = torch.aten.view %146, %147 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%149 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%150 = torch.aten.permute %148, %149 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%151 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%152 = torch.aten.view %128, %151 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%153 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%154 = torch.aten.permute %152, %153 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%155 = torch.aten.transpose.int %139, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%156 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%157 = torch.aten.expand %154, %156, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%158 = torch.operator "aten.clone"(%157, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%159 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%160 = torch.operator "aten._unsafe_view"(%158, %159) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%161 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%162 = torch.aten.expand %155, %161, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%163 = torch.operator "aten.clone"(%162, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%164 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%165 = torch.operator "aten._unsafe_view"(%163, %164) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%166 = torch.aten.bmm %160, %165 : !torch.tensor, !torch.tensor -> !torch.tensor
%167 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%168 = torch.operator "aten._unsafe_view"(%166, %167) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%169 = torch.aten.div.Tensor %168, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%170 = torch.aten.add.Tensor %169, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%171 = torch.aten._softmax %170, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%172 = torch.aten.empty_like %171, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%173 = torch.operator "aten.bernoulli_.float"(%172, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%174 = torch.aten.div_.Scalar %173, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%175 = torch.aten.mul.Tensor %171, %174 : !torch.tensor, !torch.tensor -> !torch.tensor
%176 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%177 = torch.aten.expand %175, %176, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%178 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%179 = torch.aten.view %177, %178 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%180 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%181 = torch.aten.expand %150, %180, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%182 = torch.operator "aten.clone"(%181, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%183 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%184 = torch.operator "aten._unsafe_view"(%182, %183) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%185 = torch.aten.bmm %179, %184 : !torch.tensor, !torch.tensor -> !torch.tensor
%186 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%187 = torch.operator "aten._unsafe_view"(%185, %186) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%188 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%189 = torch.aten.permute %187, %188 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%190 = torch.operator "aten.clone"(%189, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%191 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%192 = torch.aten.view %190, %191 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%193 = torch.aten.t %arg25 : !torch.tensor -> !torch.tensor
%194 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%195 = torch.aten.view %192, %194 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%196 = torch.aten.mm %195, %193 : !torch.tensor, !torch.tensor -> !torch.tensor
%197 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%198 = torch.operator "aten._unsafe_view"(%196, %197) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%199 = torch.aten.add_.Tensor %198, %arg24, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%200 = torch.aten.empty_like %199, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%201 = torch.operator "aten.bernoulli_.float"(%200, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%202 = torch.aten.div_.Scalar %201, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%203 = torch.aten.mul.Tensor %199, %202 : !torch.tensor, !torch.tensor -> !torch.tensor
%204 = torch.aten.add.Tensor %203, %result0_3, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%205 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_6, %result1_7, %result2_8 = torch.aten.native_layer_norm %204, %205, %arg23, %arg22, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%206 = torch.aten.t %arg33 : !torch.tensor -> !torch.tensor
%207 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%208 = torch.aten.view %result0_6, %207 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%209 = torch.aten.mm %208, %206 : !torch.tensor, !torch.tensor -> !torch.tensor
%210 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%211 = torch.operator "aten._unsafe_view"(%209, %210) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%212 = torch.aten.add_.Tensor %211, %arg32, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%213 = torch.aten.gelu %212 : !torch.tensor -> !torch.tensor
%214 = torch.aten.t %arg37 : !torch.tensor -> !torch.tensor
%215 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%216 = torch.aten.view %213, %215 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%217 = torch.aten.mm %216, %214 : !torch.tensor, !torch.tensor -> !torch.tensor
%218 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%219 = torch.operator "aten._unsafe_view"(%217, %218) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%220 = torch.aten.add_.Tensor %219, %arg36, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%221 = torch.aten.empty_like %220, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%222 = torch.operator "aten.bernoulli_.float"(%221, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%223 = torch.aten.div_.Scalar %222, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%224 = torch.aten.mul.Tensor %220, %223 : !torch.tensor, !torch.tensor -> !torch.tensor
%225 = torch.aten.add.Tensor %224, %result0_6, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%226 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_9, %result1_10, %result2_11 = torch.aten.native_layer_norm %225, %226, %arg35, %arg34, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%227 = torch.aten.t %arg77 : !torch.tensor -> !torch.tensor
%228 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%229 = torch.aten.view %result0_9, %228 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%230 = torch.aten.mm %229, %227 : !torch.tensor, !torch.tensor -> !torch.tensor
%231 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%232 = torch.operator "aten._unsafe_view"(%230, %231) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%233 = torch.aten.add_.Tensor %232, %arg76, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%234 = torch.aten.t %arg75 : !torch.tensor -> !torch.tensor
%235 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%236 = torch.aten.view %result0_9, %235 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%237 = torch.aten.mm %236, %234 : !torch.tensor, !torch.tensor -> !torch.tensor
%238 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%239 = torch.operator "aten._unsafe_view"(%237, %238) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%240 = torch.aten.add_.Tensor %239, %arg74, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%241 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%242 = torch.aten.view %240, %241 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%243 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%244 = torch.aten.permute %242, %243 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%245 = torch.aten.t %arg79 : !torch.tensor -> !torch.tensor
%246 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%247 = torch.aten.view %result0_9, %246 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%248 = torch.aten.mm %247, %245 : !torch.tensor, !torch.tensor -> !torch.tensor
%249 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%250 = torch.operator "aten._unsafe_view"(%248, %249) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%251 = torch.aten.add_.Tensor %250, %arg78, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%252 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%253 = torch.aten.view %251, %252 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%254 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%255 = torch.aten.permute %253, %254 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%256 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%257 = torch.aten.view %233, %256 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%258 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%259 = torch.aten.permute %257, %258 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%260 = torch.aten.transpose.int %244, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%261 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%262 = torch.aten.expand %259, %261, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%263 = torch.operator "aten.clone"(%262, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%264 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%265 = torch.operator "aten._unsafe_view"(%263, %264) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%266 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%267 = torch.aten.expand %260, %266, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%268 = torch.operator "aten.clone"(%267, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%269 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%270 = torch.operator "aten._unsafe_view"(%268, %269) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%271 = torch.aten.bmm %265, %270 : !torch.tensor, !torch.tensor -> !torch.tensor
%272 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%273 = torch.operator "aten._unsafe_view"(%271, %272) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%274 = torch.aten.div.Tensor %273, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%275 = torch.aten.add.Tensor %274, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%276 = torch.aten._softmax %275, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%277 = torch.aten.empty_like %276, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%278 = torch.operator "aten.bernoulli_.float"(%277, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%279 = torch.aten.div_.Scalar %278, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%280 = torch.aten.mul.Tensor %276, %279 : !torch.tensor, !torch.tensor -> !torch.tensor
%281 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%282 = torch.aten.expand %280, %281, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%283 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%284 = torch.aten.view %282, %283 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%285 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%286 = torch.aten.expand %255, %285, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%287 = torch.operator "aten.clone"(%286, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%288 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%289 = torch.operator "aten._unsafe_view"(%287, %288) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%290 = torch.aten.bmm %284, %289 : !torch.tensor, !torch.tensor -> !torch.tensor
%291 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%292 = torch.operator "aten._unsafe_view"(%290, %291) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%293 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%294 = torch.aten.permute %292, %293 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%295 = torch.operator "aten.clone"(%294, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%296 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%297 = torch.aten.view %295, %296 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%298 = torch.aten.t %arg73 : !torch.tensor -> !torch.tensor
%299 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%300 = torch.aten.view %297, %299 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%301 = torch.aten.mm %300, %298 : !torch.tensor, !torch.tensor -> !torch.tensor
%302 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%303 = torch.operator "aten._unsafe_view"(%301, %302) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%304 = torch.aten.add_.Tensor %303, %arg72, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%305 = torch.aten.empty_like %304, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%306 = torch.operator "aten.bernoulli_.float"(%305, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%307 = torch.aten.div_.Scalar %306, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%308 = torch.aten.mul.Tensor %304, %307 : !torch.tensor, !torch.tensor -> !torch.tensor
%309 = torch.aten.add.Tensor %308, %result0_9, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%310 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_12, %result1_13, %result2_14 = torch.aten.native_layer_norm %309, %310, %arg71, %arg70, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%311 = torch.aten.t %arg81 : !torch.tensor -> !torch.tensor
%312 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%313 = torch.aten.view %result0_12, %312 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%314 = torch.aten.mm %313, %311 : !torch.tensor, !torch.tensor -> !torch.tensor
%315 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%316 = torch.operator "aten._unsafe_view"(%314, %315) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%317 = torch.aten.add_.Tensor %316, %arg80, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%318 = torch.aten.gelu %317 : !torch.tensor -> !torch.tensor
%319 = torch.aten.t %arg85 : !torch.tensor -> !torch.tensor
%320 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%321 = torch.aten.view %318, %320 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%322 = torch.aten.mm %321, %319 : !torch.tensor, !torch.tensor -> !torch.tensor
%323 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%324 = torch.operator "aten._unsafe_view"(%322, %323) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%325 = torch.aten.add_.Tensor %324, %arg84, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%326 = torch.aten.empty_like %325, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%327 = torch.operator "aten.bernoulli_.float"(%326, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%328 = torch.aten.div_.Scalar %327, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%329 = torch.aten.mul.Tensor %325, %328 : !torch.tensor, !torch.tensor -> !torch.tensor
%330 = torch.aten.add.Tensor %329, %result0_12, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%331 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_15, %result1_16, %result2_17 = torch.aten.native_layer_norm %330, %331, %arg83, %arg82, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%332 = torch.aten.t %arg93 : !torch.tensor -> !torch.tensor
%333 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%334 = torch.aten.view %result0_15, %333 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%335 = torch.aten.mm %334, %332 : !torch.tensor, !torch.tensor -> !torch.tensor
%336 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%337 = torch.operator "aten._unsafe_view"(%335, %336) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%338 = torch.aten.add_.Tensor %337, %arg92, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%339 = torch.aten.t %arg91 : !torch.tensor -> !torch.tensor
%340 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%341 = torch.aten.view %result0_15, %340 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%342 = torch.aten.mm %341, %339 : !torch.tensor, !torch.tensor -> !torch.tensor
%343 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%344 = torch.operator "aten._unsafe_view"(%342, %343) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%345 = torch.aten.add_.Tensor %344, %arg90, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%346 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%347 = torch.aten.view %345, %346 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%348 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%349 = torch.aten.permute %347, %348 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%350 = torch.aten.t %arg95 : !torch.tensor -> !torch.tensor
%351 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%352 = torch.aten.view %result0_15, %351 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%353 = torch.aten.mm %352, %350 : !torch.tensor, !torch.tensor -> !torch.tensor
%354 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%355 = torch.operator "aten._unsafe_view"(%353, %354) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%356 = torch.aten.add_.Tensor %355, %arg94, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%357 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%358 = torch.aten.view %356, %357 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%359 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%360 = torch.aten.permute %358, %359 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%361 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%362 = torch.aten.view %338, %361 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%363 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%364 = torch.aten.permute %362, %363 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%365 = torch.aten.transpose.int %349, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%366 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%367 = torch.aten.expand %364, %366, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%368 = torch.operator "aten.clone"(%367, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%369 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%370 = torch.operator "aten._unsafe_view"(%368, %369) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%371 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%372 = torch.aten.expand %365, %371, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%373 = torch.operator "aten.clone"(%372, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%374 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%375 = torch.operator "aten._unsafe_view"(%373, %374) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%376 = torch.aten.bmm %370, %375 : !torch.tensor, !torch.tensor -> !torch.tensor
%377 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%378 = torch.operator "aten._unsafe_view"(%376, %377) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%379 = torch.aten.div.Tensor %378, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%380 = torch.aten.add.Tensor %379, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%381 = torch.aten._softmax %380, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%382 = torch.aten.empty_like %381, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%383 = torch.operator "aten.bernoulli_.float"(%382, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%384 = torch.aten.div_.Scalar %383, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%385 = torch.aten.mul.Tensor %381, %384 : !torch.tensor, !torch.tensor -> !torch.tensor
%386 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%387 = torch.aten.expand %385, %386, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%388 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%389 = torch.aten.view %387, %388 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%390 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%391 = torch.aten.expand %360, %390, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%392 = torch.operator "aten.clone"(%391, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%393 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%394 = torch.operator "aten._unsafe_view"(%392, %393) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%395 = torch.aten.bmm %389, %394 : !torch.tensor, !torch.tensor -> !torch.tensor
%396 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%397 = torch.operator "aten._unsafe_view"(%395, %396) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%398 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%399 = torch.aten.permute %397, %398 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%400 = torch.operator "aten.clone"(%399, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%401 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%402 = torch.aten.view %400, %401 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%403 = torch.aten.t %arg89 : !torch.tensor -> !torch.tensor
%404 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%405 = torch.aten.view %402, %404 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%406 = torch.aten.mm %405, %403 : !torch.tensor, !torch.tensor -> !torch.tensor
%407 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%408 = torch.operator "aten._unsafe_view"(%406, %407) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%409 = torch.aten.add_.Tensor %408, %arg88, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%410 = torch.aten.empty_like %409, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%411 = torch.operator "aten.bernoulli_.float"(%410, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%412 = torch.aten.div_.Scalar %411, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%413 = torch.aten.mul.Tensor %409, %412 : !torch.tensor, !torch.tensor -> !torch.tensor
%414 = torch.aten.add.Tensor %413, %result0_15, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%415 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_18, %result1_19, %result2_20 = torch.aten.native_layer_norm %414, %415, %arg87, %arg86, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%416 = torch.aten.t %arg97 : !torch.tensor -> !torch.tensor
%417 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%418 = torch.aten.view %result0_18, %417 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%419 = torch.aten.mm %418, %416 : !torch.tensor, !torch.tensor -> !torch.tensor
%420 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%421 = torch.operator "aten._unsafe_view"(%419, %420) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%422 = torch.aten.add_.Tensor %421, %arg96, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%423 = torch.aten.gelu %422 : !torch.tensor -> !torch.tensor
%424 = torch.aten.t %arg101 : !torch.tensor -> !torch.tensor
%425 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%426 = torch.aten.view %423, %425 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%427 = torch.aten.mm %426, %424 : !torch.tensor, !torch.tensor -> !torch.tensor
%428 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%429 = torch.operator "aten._unsafe_view"(%427, %428) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%430 = torch.aten.add_.Tensor %429, %arg100, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%431 = torch.aten.empty_like %430, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%432 = torch.operator "aten.bernoulli_.float"(%431, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%433 = torch.aten.div_.Scalar %432, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%434 = torch.aten.mul.Tensor %430, %433 : !torch.tensor, !torch.tensor -> !torch.tensor
%435 = torch.aten.add.Tensor %434, %result0_18, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%436 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_21, %result1_22, %result2_23 = torch.aten.native_layer_norm %435, %436, %arg99, %arg98, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%437 = torch.aten.t %arg109 : !torch.tensor -> !torch.tensor
%438 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%439 = torch.aten.view %result0_21, %438 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%440 = torch.aten.mm %439, %437 : !torch.tensor, !torch.tensor -> !torch.tensor
%441 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%442 = torch.operator "aten._unsafe_view"(%440, %441) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%443 = torch.aten.add_.Tensor %442, %arg108, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%444 = torch.aten.t %arg107 : !torch.tensor -> !torch.tensor
%445 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%446 = torch.aten.view %result0_21, %445 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%447 = torch.aten.mm %446, %444 : !torch.tensor, !torch.tensor -> !torch.tensor
%448 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%449 = torch.operator "aten._unsafe_view"(%447, %448) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%450 = torch.aten.add_.Tensor %449, %arg106, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%451 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%452 = torch.aten.view %450, %451 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%453 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%454 = torch.aten.permute %452, %453 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%455 = torch.aten.t %arg111 : !torch.tensor -> !torch.tensor
%456 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%457 = torch.aten.view %result0_21, %456 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%458 = torch.aten.mm %457, %455 : !torch.tensor, !torch.tensor -> !torch.tensor
%459 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%460 = torch.operator "aten._unsafe_view"(%458, %459) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%461 = torch.aten.add_.Tensor %460, %arg110, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%462 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%463 = torch.aten.view %461, %462 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%464 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%465 = torch.aten.permute %463, %464 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%466 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%467 = torch.aten.view %443, %466 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%468 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%469 = torch.aten.permute %467, %468 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%470 = torch.aten.transpose.int %454, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%471 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%472 = torch.aten.expand %469, %471, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%473 = torch.operator "aten.clone"(%472, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%474 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%475 = torch.operator "aten._unsafe_view"(%473, %474) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%476 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%477 = torch.aten.expand %470, %476, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%478 = torch.operator "aten.clone"(%477, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%479 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%480 = torch.operator "aten._unsafe_view"(%478, %479) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%481 = torch.aten.bmm %475, %480 : !torch.tensor, !torch.tensor -> !torch.tensor
%482 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%483 = torch.operator "aten._unsafe_view"(%481, %482) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%484 = torch.aten.div.Tensor %483, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%485 = torch.aten.add.Tensor %484, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%486 = torch.aten._softmax %485, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%487 = torch.aten.empty_like %486, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%488 = torch.operator "aten.bernoulli_.float"(%487, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%489 = torch.aten.div_.Scalar %488, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%490 = torch.aten.mul.Tensor %486, %489 : !torch.tensor, !torch.tensor -> !torch.tensor
%491 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%492 = torch.aten.expand %490, %491, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%493 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%494 = torch.aten.view %492, %493 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%495 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%496 = torch.aten.expand %465, %495, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%497 = torch.operator "aten.clone"(%496, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%498 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%499 = torch.operator "aten._unsafe_view"(%497, %498) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%500 = torch.aten.bmm %494, %499 : !torch.tensor, !torch.tensor -> !torch.tensor
%501 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%502 = torch.operator "aten._unsafe_view"(%500, %501) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%503 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%504 = torch.aten.permute %502, %503 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%505 = torch.operator "aten.clone"(%504, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%506 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%507 = torch.aten.view %505, %506 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%508 = torch.aten.t %arg105 : !torch.tensor -> !torch.tensor
%509 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%510 = torch.aten.view %507, %509 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%511 = torch.aten.mm %510, %508 : !torch.tensor, !torch.tensor -> !torch.tensor
%512 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%513 = torch.operator "aten._unsafe_view"(%511, %512) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%514 = torch.aten.add_.Tensor %513, %arg104, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%515 = torch.aten.empty_like %514, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%516 = torch.operator "aten.bernoulli_.float"(%515, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%517 = torch.aten.div_.Scalar %516, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%518 = torch.aten.mul.Tensor %514, %517 : !torch.tensor, !torch.tensor -> !torch.tensor
%519 = torch.aten.add.Tensor %518, %result0_21, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%520 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_24, %result1_25, %result2_26 = torch.aten.native_layer_norm %519, %520, %arg103, %arg102, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%521 = torch.aten.t %arg113 : !torch.tensor -> !torch.tensor
%522 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%523 = torch.aten.view %result0_24, %522 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%524 = torch.aten.mm %523, %521 : !torch.tensor, !torch.tensor -> !torch.tensor
%525 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%526 = torch.operator "aten._unsafe_view"(%524, %525) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%527 = torch.aten.add_.Tensor %526, %arg112, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%528 = torch.aten.gelu %527 : !torch.tensor -> !torch.tensor
%529 = torch.aten.t %arg117 : !torch.tensor -> !torch.tensor
%530 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%531 = torch.aten.view %528, %530 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%532 = torch.aten.mm %531, %529 : !torch.tensor, !torch.tensor -> !torch.tensor
%533 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%534 = torch.operator "aten._unsafe_view"(%532, %533) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%535 = torch.aten.add_.Tensor %534, %arg116, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%536 = torch.aten.empty_like %535, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%537 = torch.operator "aten.bernoulli_.float"(%536, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%538 = torch.aten.div_.Scalar %537, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%539 = torch.aten.mul.Tensor %535, %538 : !torch.tensor, !torch.tensor -> !torch.tensor
%540 = torch.aten.add.Tensor %539, %result0_24, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%541 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_27, %result1_28, %result2_29 = torch.aten.native_layer_norm %540, %541, %arg115, %arg114, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%542 = torch.aten.t %arg125 : !torch.tensor -> !torch.tensor
%543 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%544 = torch.aten.view %result0_27, %543 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%545 = torch.aten.mm %544, %542 : !torch.tensor, !torch.tensor -> !torch.tensor
%546 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%547 = torch.operator "aten._unsafe_view"(%545, %546) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%548 = torch.aten.add_.Tensor %547, %arg124, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%549 = torch.aten.t %arg123 : !torch.tensor -> !torch.tensor
%550 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%551 = torch.aten.view %result0_27, %550 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%552 = torch.aten.mm %551, %549 : !torch.tensor, !torch.tensor -> !torch.tensor
%553 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%554 = torch.operator "aten._unsafe_view"(%552, %553) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%555 = torch.aten.add_.Tensor %554, %arg122, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%556 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%557 = torch.aten.view %555, %556 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%558 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%559 = torch.aten.permute %557, %558 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%560 = torch.aten.t %arg127 : !torch.tensor -> !torch.tensor
%561 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%562 = torch.aten.view %result0_27, %561 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%563 = torch.aten.mm %562, %560 : !torch.tensor, !torch.tensor -> !torch.tensor
%564 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%565 = torch.operator "aten._unsafe_view"(%563, %564) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%566 = torch.aten.add_.Tensor %565, %arg126, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%567 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%568 = torch.aten.view %566, %567 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%569 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%570 = torch.aten.permute %568, %569 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%571 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%572 = torch.aten.view %548, %571 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%573 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%574 = torch.aten.permute %572, %573 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%575 = torch.aten.transpose.int %559, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%576 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%577 = torch.aten.expand %574, %576, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%578 = torch.operator "aten.clone"(%577, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%579 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%580 = torch.operator "aten._unsafe_view"(%578, %579) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%581 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%582 = torch.aten.expand %575, %581, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%583 = torch.operator "aten.clone"(%582, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%584 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%585 = torch.operator "aten._unsafe_view"(%583, %584) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%586 = torch.aten.bmm %580, %585 : !torch.tensor, !torch.tensor -> !torch.tensor
%587 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%588 = torch.operator "aten._unsafe_view"(%586, %587) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%589 = torch.aten.div.Tensor %588, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%590 = torch.aten.add.Tensor %589, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%591 = torch.aten._softmax %590, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%592 = torch.aten.empty_like %591, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%593 = torch.operator "aten.bernoulli_.float"(%592, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%594 = torch.aten.div_.Scalar %593, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%595 = torch.aten.mul.Tensor %591, %594 : !torch.tensor, !torch.tensor -> !torch.tensor
%596 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%597 = torch.aten.expand %595, %596, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%598 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%599 = torch.aten.view %597, %598 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%600 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%601 = torch.aten.expand %570, %600, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%602 = torch.operator "aten.clone"(%601, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%603 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%604 = torch.operator "aten._unsafe_view"(%602, %603) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%605 = torch.aten.bmm %599, %604 : !torch.tensor, !torch.tensor -> !torch.tensor
%606 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%607 = torch.operator "aten._unsafe_view"(%605, %606) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%608 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%609 = torch.aten.permute %607, %608 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%610 = torch.operator "aten.clone"(%609, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%611 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%612 = torch.aten.view %610, %611 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%613 = torch.aten.t %arg121 : !torch.tensor -> !torch.tensor
%614 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%615 = torch.aten.view %612, %614 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%616 = torch.aten.mm %615, %613 : !torch.tensor, !torch.tensor -> !torch.tensor
%617 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%618 = torch.operator "aten._unsafe_view"(%616, %617) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%619 = torch.aten.add_.Tensor %618, %arg120, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%620 = torch.aten.empty_like %619, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%621 = torch.operator "aten.bernoulli_.float"(%620, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%622 = torch.aten.div_.Scalar %621, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%623 = torch.aten.mul.Tensor %619, %622 : !torch.tensor, !torch.tensor -> !torch.tensor
%624 = torch.aten.add.Tensor %623, %result0_27, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%625 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_30, %result1_31, %result2_32 = torch.aten.native_layer_norm %624, %625, %arg119, %arg118, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%626 = torch.aten.t %arg129 : !torch.tensor -> !torch.tensor
%627 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%628 = torch.aten.view %result0_30, %627 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%629 = torch.aten.mm %628, %626 : !torch.tensor, !torch.tensor -> !torch.tensor
%630 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%631 = torch.operator "aten._unsafe_view"(%629, %630) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%632 = torch.aten.add_.Tensor %631, %arg128, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%633 = torch.aten.gelu %632 : !torch.tensor -> !torch.tensor
%634 = torch.aten.t %arg133 : !torch.tensor -> !torch.tensor
%635 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%636 = torch.aten.view %633, %635 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%637 = torch.aten.mm %636, %634 : !torch.tensor, !torch.tensor -> !torch.tensor
%638 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%639 = torch.operator "aten._unsafe_view"(%637, %638) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%640 = torch.aten.add_.Tensor %639, %arg132, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%641 = torch.aten.empty_like %640, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%642 = torch.operator "aten.bernoulli_.float"(%641, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%643 = torch.aten.div_.Scalar %642, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%644 = torch.aten.mul.Tensor %640, %643 : !torch.tensor, !torch.tensor -> !torch.tensor
%645 = torch.aten.add.Tensor %644, %result0_30, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%646 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_33, %result1_34, %result2_35 = torch.aten.native_layer_norm %645, %646, %arg131, %arg130, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%647 = torch.aten.t %arg141 : !torch.tensor -> !torch.tensor
%648 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%649 = torch.aten.view %result0_33, %648 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%650 = torch.aten.mm %649, %647 : !torch.tensor, !torch.tensor -> !torch.tensor
%651 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%652 = torch.operator "aten._unsafe_view"(%650, %651) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%653 = torch.aten.add_.Tensor %652, %arg140, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%654 = torch.aten.t %arg139 : !torch.tensor -> !torch.tensor
%655 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%656 = torch.aten.view %result0_33, %655 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%657 = torch.aten.mm %656, %654 : !torch.tensor, !torch.tensor -> !torch.tensor
%658 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%659 = torch.operator "aten._unsafe_view"(%657, %658) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%660 = torch.aten.add_.Tensor %659, %arg138, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%661 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%662 = torch.aten.view %660, %661 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%663 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%664 = torch.aten.permute %662, %663 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%665 = torch.aten.t %arg143 : !torch.tensor -> !torch.tensor
%666 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%667 = torch.aten.view %result0_33, %666 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%668 = torch.aten.mm %667, %665 : !torch.tensor, !torch.tensor -> !torch.tensor
%669 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%670 = torch.operator "aten._unsafe_view"(%668, %669) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%671 = torch.aten.add_.Tensor %670, %arg142, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%672 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%673 = torch.aten.view %671, %672 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%674 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%675 = torch.aten.permute %673, %674 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%676 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%677 = torch.aten.view %653, %676 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%678 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%679 = torch.aten.permute %677, %678 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%680 = torch.aten.transpose.int %664, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%681 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%682 = torch.aten.expand %679, %681, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%683 = torch.operator "aten.clone"(%682, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%684 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%685 = torch.operator "aten._unsafe_view"(%683, %684) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%686 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%687 = torch.aten.expand %680, %686, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%688 = torch.operator "aten.clone"(%687, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%689 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%690 = torch.operator "aten._unsafe_view"(%688, %689) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%691 = torch.aten.bmm %685, %690 : !torch.tensor, !torch.tensor -> !torch.tensor
%692 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%693 = torch.operator "aten._unsafe_view"(%691, %692) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%694 = torch.aten.div.Tensor %693, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%695 = torch.aten.add.Tensor %694, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%696 = torch.aten._softmax %695, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%697 = torch.aten.empty_like %696, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%698 = torch.operator "aten.bernoulli_.float"(%697, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%699 = torch.aten.div_.Scalar %698, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%700 = torch.aten.mul.Tensor %696, %699 : !torch.tensor, !torch.tensor -> !torch.tensor
%701 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%702 = torch.aten.expand %700, %701, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%703 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%704 = torch.aten.view %702, %703 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%705 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%706 = torch.aten.expand %675, %705, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%707 = torch.operator "aten.clone"(%706, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%708 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%709 = torch.operator "aten._unsafe_view"(%707, %708) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%710 = torch.aten.bmm %704, %709 : !torch.tensor, !torch.tensor -> !torch.tensor
%711 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%712 = torch.operator "aten._unsafe_view"(%710, %711) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%713 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%714 = torch.aten.permute %712, %713 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%715 = torch.operator "aten.clone"(%714, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%716 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%717 = torch.aten.view %715, %716 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%718 = torch.aten.t %arg137 : !torch.tensor -> !torch.tensor
%719 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%720 = torch.aten.view %717, %719 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%721 = torch.aten.mm %720, %718 : !torch.tensor, !torch.tensor -> !torch.tensor
%722 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%723 = torch.operator "aten._unsafe_view"(%721, %722) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%724 = torch.aten.add_.Tensor %723, %arg136, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%725 = torch.aten.empty_like %724, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%726 = torch.operator "aten.bernoulli_.float"(%725, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%727 = torch.aten.div_.Scalar %726, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%728 = torch.aten.mul.Tensor %724, %727 : !torch.tensor, !torch.tensor -> !torch.tensor
%729 = torch.aten.add.Tensor %728, %result0_33, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%730 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_36, %result1_37, %result2_38 = torch.aten.native_layer_norm %729, %730, %arg135, %arg134, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%731 = torch.aten.t %arg145 : !torch.tensor -> !torch.tensor
%732 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%733 = torch.aten.view %result0_36, %732 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%734 = torch.aten.mm %733, %731 : !torch.tensor, !torch.tensor -> !torch.tensor
%735 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%736 = torch.operator "aten._unsafe_view"(%734, %735) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%737 = torch.aten.add_.Tensor %736, %arg144, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%738 = torch.aten.gelu %737 : !torch.tensor -> !torch.tensor
%739 = torch.aten.t %arg149 : !torch.tensor -> !torch.tensor
%740 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%741 = torch.aten.view %738, %740 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%742 = torch.aten.mm %741, %739 : !torch.tensor, !torch.tensor -> !torch.tensor
%743 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%744 = torch.operator "aten._unsafe_view"(%742, %743) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%745 = torch.aten.add_.Tensor %744, %arg148, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%746 = torch.aten.empty_like %745, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%747 = torch.operator "aten.bernoulli_.float"(%746, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%748 = torch.aten.div_.Scalar %747, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%749 = torch.aten.mul.Tensor %745, %748 : !torch.tensor, !torch.tensor -> !torch.tensor
%750 = torch.aten.add.Tensor %749, %result0_36, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%751 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_39, %result1_40, %result2_41 = torch.aten.native_layer_norm %750, %751, %arg147, %arg146, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%752 = torch.aten.t %arg157 : !torch.tensor -> !torch.tensor
%753 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%754 = torch.aten.view %result0_39, %753 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%755 = torch.aten.mm %754, %752 : !torch.tensor, !torch.tensor -> !torch.tensor
%756 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%757 = torch.operator "aten._unsafe_view"(%755, %756) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%758 = torch.aten.add_.Tensor %757, %arg156, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%759 = torch.aten.t %arg155 : !torch.tensor -> !torch.tensor
%760 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%761 = torch.aten.view %result0_39, %760 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%762 = torch.aten.mm %761, %759 : !torch.tensor, !torch.tensor -> !torch.tensor
%763 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%764 = torch.operator "aten._unsafe_view"(%762, %763) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%765 = torch.aten.add_.Tensor %764, %arg154, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%766 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%767 = torch.aten.view %765, %766 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%768 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%769 = torch.aten.permute %767, %768 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%770 = torch.aten.t %arg159 : !torch.tensor -> !torch.tensor
%771 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%772 = torch.aten.view %result0_39, %771 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%773 = torch.aten.mm %772, %770 : !torch.tensor, !torch.tensor -> !torch.tensor
%774 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%775 = torch.operator "aten._unsafe_view"(%773, %774) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%776 = torch.aten.add_.Tensor %775, %arg158, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%777 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%778 = torch.aten.view %776, %777 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%779 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%780 = torch.aten.permute %778, %779 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%781 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%782 = torch.aten.view %758, %781 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%783 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%784 = torch.aten.permute %782, %783 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%785 = torch.aten.transpose.int %769, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%786 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%787 = torch.aten.expand %784, %786, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%788 = torch.operator "aten.clone"(%787, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%789 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%790 = torch.operator "aten._unsafe_view"(%788, %789) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%791 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%792 = torch.aten.expand %785, %791, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%793 = torch.operator "aten.clone"(%792, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%794 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%795 = torch.operator "aten._unsafe_view"(%793, %794) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%796 = torch.aten.bmm %790, %795 : !torch.tensor, !torch.tensor -> !torch.tensor
%797 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%798 = torch.operator "aten._unsafe_view"(%796, %797) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%799 = torch.aten.div.Tensor %798, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%800 = torch.aten.add.Tensor %799, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%801 = torch.aten._softmax %800, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%802 = torch.aten.empty_like %801, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%803 = torch.operator "aten.bernoulli_.float"(%802, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%804 = torch.aten.div_.Scalar %803, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%805 = torch.aten.mul.Tensor %801, %804 : !torch.tensor, !torch.tensor -> !torch.tensor
%806 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%807 = torch.aten.expand %805, %806, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%808 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%809 = torch.aten.view %807, %808 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%810 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%811 = torch.aten.expand %780, %810, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%812 = torch.operator "aten.clone"(%811, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%813 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%814 = torch.operator "aten._unsafe_view"(%812, %813) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%815 = torch.aten.bmm %809, %814 : !torch.tensor, !torch.tensor -> !torch.tensor
%816 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%817 = torch.operator "aten._unsafe_view"(%815, %816) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%818 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%819 = torch.aten.permute %817, %818 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%820 = torch.operator "aten.clone"(%819, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%821 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%822 = torch.aten.view %820, %821 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%823 = torch.aten.t %arg153 : !torch.tensor -> !torch.tensor
%824 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%825 = torch.aten.view %822, %824 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%826 = torch.aten.mm %825, %823 : !torch.tensor, !torch.tensor -> !torch.tensor
%827 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%828 = torch.operator "aten._unsafe_view"(%826, %827) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%829 = torch.aten.add_.Tensor %828, %arg152, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%830 = torch.aten.empty_like %829, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%831 = torch.operator "aten.bernoulli_.float"(%830, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%832 = torch.aten.div_.Scalar %831, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%833 = torch.aten.mul.Tensor %829, %832 : !torch.tensor, !torch.tensor -> !torch.tensor
%834 = torch.aten.add.Tensor %833, %result0_39, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%835 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_42, %result1_43, %result2_44 = torch.aten.native_layer_norm %834, %835, %arg151, %arg150, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%836 = torch.aten.t %arg161 : !torch.tensor -> !torch.tensor
%837 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%838 = torch.aten.view %result0_42, %837 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%839 = torch.aten.mm %838, %836 : !torch.tensor, !torch.tensor -> !torch.tensor
%840 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%841 = torch.operator "aten._unsafe_view"(%839, %840) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%842 = torch.aten.add_.Tensor %841, %arg160, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%843 = torch.aten.gelu %842 : !torch.tensor -> !torch.tensor
%844 = torch.aten.t %arg165 : !torch.tensor -> !torch.tensor
%845 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%846 = torch.aten.view %843, %845 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%847 = torch.aten.mm %846, %844 : !torch.tensor, !torch.tensor -> !torch.tensor
%848 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%849 = torch.operator "aten._unsafe_view"(%847, %848) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%850 = torch.aten.add_.Tensor %849, %arg164, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%851 = torch.aten.empty_like %850, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%852 = torch.operator "aten.bernoulli_.float"(%851, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%853 = torch.aten.div_.Scalar %852, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%854 = torch.aten.mul.Tensor %850, %853 : !torch.tensor, !torch.tensor -> !torch.tensor
%855 = torch.aten.add.Tensor %854, %result0_42, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%856 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_45, %result1_46, %result2_47 = torch.aten.native_layer_norm %855, %856, %arg163, %arg162, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%857 = torch.aten.t %arg173 : !torch.tensor -> !torch.tensor
%858 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%859 = torch.aten.view %result0_45, %858 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%860 = torch.aten.mm %859, %857 : !torch.tensor, !torch.tensor -> !torch.tensor
%861 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%862 = torch.operator "aten._unsafe_view"(%860, %861) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%863 = torch.aten.add_.Tensor %862, %arg172, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%864 = torch.aten.t %arg171 : !torch.tensor -> !torch.tensor
%865 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%866 = torch.aten.view %result0_45, %865 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%867 = torch.aten.mm %866, %864 : !torch.tensor, !torch.tensor -> !torch.tensor
%868 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%869 = torch.operator "aten._unsafe_view"(%867, %868) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%870 = torch.aten.add_.Tensor %869, %arg170, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%871 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%872 = torch.aten.view %870, %871 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%873 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%874 = torch.aten.permute %872, %873 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%875 = torch.aten.t %arg175 : !torch.tensor -> !torch.tensor
%876 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%877 = torch.aten.view %result0_45, %876 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%878 = torch.aten.mm %877, %875 : !torch.tensor, !torch.tensor -> !torch.tensor
%879 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%880 = torch.operator "aten._unsafe_view"(%878, %879) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%881 = torch.aten.add_.Tensor %880, %arg174, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%882 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%883 = torch.aten.view %881, %882 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%884 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%885 = torch.aten.permute %883, %884 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%886 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%887 = torch.aten.view %863, %886 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%888 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%889 = torch.aten.permute %887, %888 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%890 = torch.aten.transpose.int %874, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%891 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%892 = torch.aten.expand %889, %891, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%893 = torch.operator "aten.clone"(%892, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%894 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%895 = torch.operator "aten._unsafe_view"(%893, %894) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%896 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%897 = torch.aten.expand %890, %896, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%898 = torch.operator "aten.clone"(%897, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%899 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%900 = torch.operator "aten._unsafe_view"(%898, %899) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%901 = torch.aten.bmm %895, %900 : !torch.tensor, !torch.tensor -> !torch.tensor
%902 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%903 = torch.operator "aten._unsafe_view"(%901, %902) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%904 = torch.aten.div.Tensor %903, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%905 = torch.aten.add.Tensor %904, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%906 = torch.aten._softmax %905, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%907 = torch.aten.empty_like %906, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%908 = torch.operator "aten.bernoulli_.float"(%907, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%909 = torch.aten.div_.Scalar %908, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%910 = torch.aten.mul.Tensor %906, %909 : !torch.tensor, !torch.tensor -> !torch.tensor
%911 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%912 = torch.aten.expand %910, %911, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%913 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%914 = torch.aten.view %912, %913 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%915 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%916 = torch.aten.expand %885, %915, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%917 = torch.operator "aten.clone"(%916, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%918 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%919 = torch.operator "aten._unsafe_view"(%917, %918) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%920 = torch.aten.bmm %914, %919 : !torch.tensor, !torch.tensor -> !torch.tensor
%921 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%922 = torch.operator "aten._unsafe_view"(%920, %921) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%923 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%924 = torch.aten.permute %922, %923 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%925 = torch.operator "aten.clone"(%924, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%926 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%927 = torch.aten.view %925, %926 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%928 = torch.aten.t %arg169 : !torch.tensor -> !torch.tensor
%929 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%930 = torch.aten.view %927, %929 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%931 = torch.aten.mm %930, %928 : !torch.tensor, !torch.tensor -> !torch.tensor
%932 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%933 = torch.operator "aten._unsafe_view"(%931, %932) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%934 = torch.aten.add_.Tensor %933, %arg168, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%935 = torch.aten.empty_like %934, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%936 = torch.operator "aten.bernoulli_.float"(%935, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%937 = torch.aten.div_.Scalar %936, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%938 = torch.aten.mul.Tensor %934, %937 : !torch.tensor, !torch.tensor -> !torch.tensor
%939 = torch.aten.add.Tensor %938, %result0_45, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%940 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_48, %result1_49, %result2_50 = torch.aten.native_layer_norm %939, %940, %arg167, %arg166, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%941 = torch.aten.t %arg177 : !torch.tensor -> !torch.tensor
%942 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%943 = torch.aten.view %result0_48, %942 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%944 = torch.aten.mm %943, %941 : !torch.tensor, !torch.tensor -> !torch.tensor
%945 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%946 = torch.operator "aten._unsafe_view"(%944, %945) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%947 = torch.aten.add_.Tensor %946, %arg176, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%948 = torch.aten.gelu %947 : !torch.tensor -> !torch.tensor
%949 = torch.aten.t %arg181 : !torch.tensor -> !torch.tensor
%950 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%951 = torch.aten.view %948, %950 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%952 = torch.aten.mm %951, %949 : !torch.tensor, !torch.tensor -> !torch.tensor
%953 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%954 = torch.operator "aten._unsafe_view"(%952, %953) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%955 = torch.aten.add_.Tensor %954, %arg180, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%956 = torch.aten.empty_like %955, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%957 = torch.operator "aten.bernoulli_.float"(%956, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%958 = torch.aten.div_.Scalar %957, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%959 = torch.aten.mul.Tensor %955, %958 : !torch.tensor, !torch.tensor -> !torch.tensor
%960 = torch.aten.add.Tensor %959, %result0_48, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%961 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_51, %result1_52, %result2_53 = torch.aten.native_layer_norm %960, %961, %arg179, %arg178, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%962 = torch.aten.t %arg189 : !torch.tensor -> !torch.tensor
%963 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%964 = torch.aten.view %result0_51, %963 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%965 = torch.aten.mm %964, %962 : !torch.tensor, !torch.tensor -> !torch.tensor
%966 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%967 = torch.operator "aten._unsafe_view"(%965, %966) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%968 = torch.aten.add_.Tensor %967, %arg188, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%969 = torch.aten.t %arg187 : !torch.tensor -> !torch.tensor
%970 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%971 = torch.aten.view %result0_51, %970 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%972 = torch.aten.mm %971, %969 : !torch.tensor, !torch.tensor -> !torch.tensor
%973 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%974 = torch.operator "aten._unsafe_view"(%972, %973) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%975 = torch.aten.add_.Tensor %974, %arg186, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%976 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%977 = torch.aten.view %975, %976 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%978 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%979 = torch.aten.permute %977, %978 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%980 = torch.aten.t %arg191 : !torch.tensor -> !torch.tensor
%981 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%982 = torch.aten.view %result0_51, %981 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%983 = torch.aten.mm %982, %980 : !torch.tensor, !torch.tensor -> !torch.tensor
%984 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%985 = torch.operator "aten._unsafe_view"(%983, %984) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%986 = torch.aten.add_.Tensor %985, %arg190, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%987 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%988 = torch.aten.view %986, %987 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%989 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%990 = torch.aten.permute %988, %989 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%991 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%992 = torch.aten.view %968, %991 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%993 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%994 = torch.aten.permute %992, %993 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%995 = torch.aten.transpose.int %979, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%996 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%997 = torch.aten.expand %994, %996, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%998 = torch.operator "aten.clone"(%997, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%999 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1000 = torch.operator "aten._unsafe_view"(%998, %999) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1001 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1002 = torch.aten.expand %995, %1001, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1003 = torch.operator "aten.clone"(%1002, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1004 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1005 = torch.operator "aten._unsafe_view"(%1003, %1004) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1006 = torch.aten.bmm %1000, %1005 : !torch.tensor, !torch.tensor -> !torch.tensor
%1007 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1008 = torch.operator "aten._unsafe_view"(%1006, %1007) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1009 = torch.aten.div.Tensor %1008, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1010 = torch.aten.add.Tensor %1009, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1011 = torch.aten._softmax %1010, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1012 = torch.aten.empty_like %1011, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1013 = torch.operator "aten.bernoulli_.float"(%1012, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1014 = torch.aten.div_.Scalar %1013, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1015 = torch.aten.mul.Tensor %1011, %1014 : !torch.tensor, !torch.tensor -> !torch.tensor
%1016 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1017 = torch.aten.expand %1015, %1016, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1018 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1019 = torch.aten.view %1017, %1018 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1020 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1021 = torch.aten.expand %990, %1020, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1022 = torch.operator "aten.clone"(%1021, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1023 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1024 = torch.operator "aten._unsafe_view"(%1022, %1023) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1025 = torch.aten.bmm %1019, %1024 : !torch.tensor, !torch.tensor -> !torch.tensor
%1026 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1027 = torch.operator "aten._unsafe_view"(%1025, %1026) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1028 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1029 = torch.aten.permute %1027, %1028 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1030 = torch.operator "aten.clone"(%1029, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1031 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1032 = torch.aten.view %1030, %1031 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1033 = torch.aten.t %arg185 : !torch.tensor -> !torch.tensor
%1034 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1035 = torch.aten.view %1032, %1034 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1036 = torch.aten.mm %1035, %1033 : !torch.tensor, !torch.tensor -> !torch.tensor
%1037 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1038 = torch.operator "aten._unsafe_view"(%1036, %1037) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1039 = torch.aten.add_.Tensor %1038, %arg184, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1040 = torch.aten.empty_like %1039, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1041 = torch.operator "aten.bernoulli_.float"(%1040, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1042 = torch.aten.div_.Scalar %1041, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1043 = torch.aten.mul.Tensor %1039, %1042 : !torch.tensor, !torch.tensor -> !torch.tensor
%1044 = torch.aten.add.Tensor %1043, %result0_51, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1045 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_54, %result1_55, %result2_56 = torch.aten.native_layer_norm %1044, %1045, %arg183, %arg182, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1046 = torch.aten.t %arg193 : !torch.tensor -> !torch.tensor
%1047 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1048 = torch.aten.view %result0_54, %1047 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1049 = torch.aten.mm %1048, %1046 : !torch.tensor, !torch.tensor -> !torch.tensor
%1050 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1051 = torch.operator "aten._unsafe_view"(%1049, %1050) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1052 = torch.aten.add_.Tensor %1051, %arg192, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1053 = torch.aten.gelu %1052 : !torch.tensor -> !torch.tensor
%1054 = torch.aten.t %arg197 : !torch.tensor -> !torch.tensor
%1055 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1056 = torch.aten.view %1053, %1055 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1057 = torch.aten.mm %1056, %1054 : !torch.tensor, !torch.tensor -> !torch.tensor
%1058 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1059 = torch.operator "aten._unsafe_view"(%1057, %1058) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1060 = torch.aten.add_.Tensor %1059, %arg196, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1061 = torch.aten.empty_like %1060, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1062 = torch.operator "aten.bernoulli_.float"(%1061, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1063 = torch.aten.div_.Scalar %1062, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1064 = torch.aten.mul.Tensor %1060, %1063 : !torch.tensor, !torch.tensor -> !torch.tensor
%1065 = torch.aten.add.Tensor %1064, %result0_54, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1066 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_57, %result1_58, %result2_59 = torch.aten.native_layer_norm %1065, %1066, %arg195, %arg194, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1067 = torch.aten.t %arg45 : !torch.tensor -> !torch.tensor
%1068 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1069 = torch.aten.view %result0_57, %1068 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1070 = torch.aten.mm %1069, %1067 : !torch.tensor, !torch.tensor -> !torch.tensor
%1071 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1072 = torch.operator "aten._unsafe_view"(%1070, %1071) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1073 = torch.aten.add_.Tensor %1072, %arg44, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1074 = torch.aten.t %arg43 : !torch.tensor -> !torch.tensor
%1075 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1076 = torch.aten.view %result0_57, %1075 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1077 = torch.aten.mm %1076, %1074 : !torch.tensor, !torch.tensor -> !torch.tensor
%1078 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1079 = torch.operator "aten._unsafe_view"(%1077, %1078) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1080 = torch.aten.add_.Tensor %1079, %arg42, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1081 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1082 = torch.aten.view %1080, %1081 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1083 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1084 = torch.aten.permute %1082, %1083 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1085 = torch.aten.t %arg47 : !torch.tensor -> !torch.tensor
%1086 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1087 = torch.aten.view %result0_57, %1086 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1088 = torch.aten.mm %1087, %1085 : !torch.tensor, !torch.tensor -> !torch.tensor
%1089 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1090 = torch.operator "aten._unsafe_view"(%1088, %1089) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1091 = torch.aten.add_.Tensor %1090, %arg46, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1092 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1093 = torch.aten.view %1091, %1092 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1094 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1095 = torch.aten.permute %1093, %1094 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1096 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1097 = torch.aten.view %1073, %1096 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1098 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1099 = torch.aten.permute %1097, %1098 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1100 = torch.aten.transpose.int %1084, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1101 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1102 = torch.aten.expand %1099, %1101, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1103 = torch.operator "aten.clone"(%1102, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1104 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1105 = torch.operator "aten._unsafe_view"(%1103, %1104) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1106 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1107 = torch.aten.expand %1100, %1106, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1108 = torch.operator "aten.clone"(%1107, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1109 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1110 = torch.operator "aten._unsafe_view"(%1108, %1109) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1111 = torch.aten.bmm %1105, %1110 : !torch.tensor, !torch.tensor -> !torch.tensor
%1112 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1113 = torch.operator "aten._unsafe_view"(%1111, %1112) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1114 = torch.aten.div.Tensor %1113, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1115 = torch.aten.add.Tensor %1114, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1116 = torch.aten._softmax %1115, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1117 = torch.aten.empty_like %1116, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1118 = torch.operator "aten.bernoulli_.float"(%1117, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1119 = torch.aten.div_.Scalar %1118, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1120 = torch.aten.mul.Tensor %1116, %1119 : !torch.tensor, !torch.tensor -> !torch.tensor
%1121 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1122 = torch.aten.expand %1120, %1121, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1123 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1124 = torch.aten.view %1122, %1123 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1125 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1126 = torch.aten.expand %1095, %1125, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1127 = torch.operator "aten.clone"(%1126, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1128 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1129 = torch.operator "aten._unsafe_view"(%1127, %1128) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1130 = torch.aten.bmm %1124, %1129 : !torch.tensor, !torch.tensor -> !torch.tensor
%1131 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1132 = torch.operator "aten._unsafe_view"(%1130, %1131) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1133 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1134 = torch.aten.permute %1132, %1133 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1135 = torch.operator "aten.clone"(%1134, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1136 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1137 = torch.aten.view %1135, %1136 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1138 = torch.aten.t %arg41 : !torch.tensor -> !torch.tensor
%1139 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1140 = torch.aten.view %1137, %1139 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1141 = torch.aten.mm %1140, %1138 : !torch.tensor, !torch.tensor -> !torch.tensor
%1142 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1143 = torch.operator "aten._unsafe_view"(%1141, %1142) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1144 = torch.aten.add_.Tensor %1143, %arg40, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1145 = torch.aten.empty_like %1144, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1146 = torch.operator "aten.bernoulli_.float"(%1145, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1147 = torch.aten.div_.Scalar %1146, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1148 = torch.aten.mul.Tensor %1144, %1147 : !torch.tensor, !torch.tensor -> !torch.tensor
%1149 = torch.aten.add.Tensor %1148, %result0_57, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1150 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_60, %result1_61, %result2_62 = torch.aten.native_layer_norm %1149, %1150, %arg39, %arg38, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1151 = torch.aten.t %arg49 : !torch.tensor -> !torch.tensor
%1152 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1153 = torch.aten.view %result0_60, %1152 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1154 = torch.aten.mm %1153, %1151 : !torch.tensor, !torch.tensor -> !torch.tensor
%1155 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1156 = torch.operator "aten._unsafe_view"(%1154, %1155) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1157 = torch.aten.add_.Tensor %1156, %arg48, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1158 = torch.aten.gelu %1157 : !torch.tensor -> !torch.tensor
%1159 = torch.aten.t %arg53 : !torch.tensor -> !torch.tensor
%1160 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1161 = torch.aten.view %1158, %1160 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1162 = torch.aten.mm %1161, %1159 : !torch.tensor, !torch.tensor -> !torch.tensor
%1163 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1164 = torch.operator "aten._unsafe_view"(%1162, %1163) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1165 = torch.aten.add_.Tensor %1164, %arg52, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1166 = torch.aten.empty_like %1165, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1167 = torch.operator "aten.bernoulli_.float"(%1166, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1168 = torch.aten.div_.Scalar %1167, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1169 = torch.aten.mul.Tensor %1165, %1168 : !torch.tensor, !torch.tensor -> !torch.tensor
%1170 = torch.aten.add.Tensor %1169, %result0_60, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1171 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_63, %result1_64, %result2_65 = torch.aten.native_layer_norm %1170, %1171, %arg51, %arg50, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1172 = torch.aten.t %arg61 : !torch.tensor -> !torch.tensor
%1173 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1174 = torch.aten.view %result0_63, %1173 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1175 = torch.aten.mm %1174, %1172 : !torch.tensor, !torch.tensor -> !torch.tensor
%1176 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1177 = torch.operator "aten._unsafe_view"(%1175, %1176) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1178 = torch.aten.add_.Tensor %1177, %arg60, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1179 = torch.aten.t %arg59 : !torch.tensor -> !torch.tensor
%1180 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1181 = torch.aten.view %result0_63, %1180 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1182 = torch.aten.mm %1181, %1179 : !torch.tensor, !torch.tensor -> !torch.tensor
%1183 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1184 = torch.operator "aten._unsafe_view"(%1182, %1183) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1185 = torch.aten.add_.Tensor %1184, %arg58, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1186 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1187 = torch.aten.view %1185, %1186 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1188 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1189 = torch.aten.permute %1187, %1188 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1190 = torch.aten.t %arg63 : !torch.tensor -> !torch.tensor
%1191 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1192 = torch.aten.view %result0_63, %1191 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1193 = torch.aten.mm %1192, %1190 : !torch.tensor, !torch.tensor -> !torch.tensor
%1194 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1195 = torch.operator "aten._unsafe_view"(%1193, %1194) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1196 = torch.aten.add_.Tensor %1195, %arg62, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1197 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1198 = torch.aten.view %1196, %1197 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1199 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1200 = torch.aten.permute %1198, %1199 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1201 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1202 = torch.aten.view %1178, %1201 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1203 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1204 = torch.aten.permute %1202, %1203 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1205 = torch.aten.transpose.int %1189, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1206 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1207 = torch.aten.expand %1204, %1206, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1208 = torch.operator "aten.clone"(%1207, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1209 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1210 = torch.operator "aten._unsafe_view"(%1208, %1209) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1211 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1212 = torch.aten.expand %1205, %1211, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1213 = torch.operator "aten.clone"(%1212, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1214 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1215 = torch.operator "aten._unsafe_view"(%1213, %1214) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1216 = torch.aten.bmm %1210, %1215 : !torch.tensor, !torch.tensor -> !torch.tensor
%1217 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1218 = torch.operator "aten._unsafe_view"(%1216, %1217) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1219 = torch.aten.div.Tensor %1218, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1220 = torch.aten.add.Tensor %1219, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1221 = torch.aten._softmax %1220, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1222 = torch.aten.empty_like %1221, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1223 = torch.operator "aten.bernoulli_.float"(%1222, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1224 = torch.aten.div_.Scalar %1223, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1225 = torch.aten.mul.Tensor %1221, %1224 : !torch.tensor, !torch.tensor -> !torch.tensor
%1226 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1227 = torch.aten.expand %1225, %1226, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1228 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1229 = torch.aten.view %1227, %1228 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1230 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1231 = torch.aten.expand %1200, %1230, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1232 = torch.operator "aten.clone"(%1231, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1233 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1234 = torch.operator "aten._unsafe_view"(%1232, %1233) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1235 = torch.aten.bmm %1229, %1234 : !torch.tensor, !torch.tensor -> !torch.tensor
%1236 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1237 = torch.operator "aten._unsafe_view"(%1235, %1236) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1238 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1239 = torch.aten.permute %1237, %1238 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1240 = torch.operator "aten.clone"(%1239, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1241 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1242 = torch.aten.view %1240, %1241 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1243 = torch.aten.t %arg57 : !torch.tensor -> !torch.tensor
%1244 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1245 = torch.aten.view %1242, %1244 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1246 = torch.aten.mm %1245, %1243 : !torch.tensor, !torch.tensor -> !torch.tensor
%1247 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1248 = torch.operator "aten._unsafe_view"(%1246, %1247) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1249 = torch.aten.add_.Tensor %1248, %arg56, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1250 = torch.aten.empty_like %1249, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1251 = torch.operator "aten.bernoulli_.float"(%1250, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1252 = torch.aten.div_.Scalar %1251, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1253 = torch.aten.mul.Tensor %1249, %1252 : !torch.tensor, !torch.tensor -> !torch.tensor
%1254 = torch.aten.add.Tensor %1253, %result0_63, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1255 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_66, %result1_67, %result2_68 = torch.aten.native_layer_norm %1254, %1255, %arg55, %arg54, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1256 = torch.aten.t %arg65 : !torch.tensor -> !torch.tensor
%1257 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1258 = torch.aten.view %result0_66, %1257 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1259 = torch.aten.mm %1258, %1256 : !torch.tensor, !torch.tensor -> !torch.tensor
%1260 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1261 = torch.operator "aten._unsafe_view"(%1259, %1260) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1262 = torch.aten.add_.Tensor %1261, %arg64, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1263 = torch.aten.gelu %1262 : !torch.tensor -> !torch.tensor
%1264 = torch.aten.t %arg69 : !torch.tensor -> !torch.tensor
%1265 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1266 = torch.aten.view %1263, %1265 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1267 = torch.aten.mm %1266, %1264 : !torch.tensor, !torch.tensor -> !torch.tensor
%1268 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1269 = torch.operator "aten._unsafe_view"(%1267, %1268) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1270 = torch.aten.add_.Tensor %1269, %arg68, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1271 = torch.aten.empty_like %1270, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1272 = torch.operator "aten.bernoulli_.float"(%1271, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1273 = torch.aten.div_.Scalar %1272, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1274 = torch.aten.mul.Tensor %1270, %1273 : !torch.tensor, !torch.tensor -> !torch.tensor
%1275 = torch.aten.add.Tensor %1274, %result0_66, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1276 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_69, %result1_70, %result2_71 = torch.aten.native_layer_norm %1275, %1276, %arg67, %arg66, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1277 = torch.aten.t %arg204 : !torch.tensor -> !torch.tensor
%1278 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1279 = torch.aten.view %result0_69, %1278 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1280 = torch.aten.mm %1279, %1277 : !torch.tensor, !torch.tensor -> !torch.tensor
%1281 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1282 = torch.operator "aten._unsafe_view"(%1280, %1281) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1283 = torch.aten.add_.Tensor %1282, %arg203, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1284 = torch.aten.gelu %1283 : !torch.tensor -> !torch.tensor
%1285 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_72, %result1_73, %result2_74 = torch.aten.native_layer_norm %1284, %1285, %arg202, %arg201, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1286 = torch.aten.t %arg200 : !torch.tensor -> !torch.tensor
%1287 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1288 = torch.aten.view %result0_72, %1287 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1289 = torch.aten.mm %1288, %1286 : !torch.tensor, !torch.tensor -> !torch.tensor
%1290 = torch.prim.ListConstruct %int4, %int512, %int30522 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1291 = torch.operator "aten._unsafe_view"(%1289, %1290) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1292 = torch.aten.add_.Tensor %1291, %arg199, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1293 = torch.prim.ListConstruct %int-1, %int30522 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1294 = torch.aten.view %1292, %1293 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1295 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<!torch.int>
%1296 = torch.aten.view %arg208, %1295 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1297 = torch.operator "aten._log_softmax"(%1294, %int1, %false) : (!torch.tensor, !torch.int, !torch.bool) -> !torch.tensor
%output, %total_weight = torch.aten.nll_loss_forward %1297, %1296, %none, %int1, %int-100 : !torch.tensor, !torch.tensor, !torch.none, !torch.int, !torch.int -> !torch.tensor, !torch.tensor
%1298 = torch.aten.transpose.int %1229, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1299 = torch.aten.transpose.int %1234, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1300 = torch.aten.transpose.int %1210, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1301 = torch.aten.transpose.int %1215, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1302 = torch.aten.transpose.int %1124, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1303 = torch.aten.transpose.int %1129, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1304 = torch.aten.transpose.int %1105, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1305 = torch.aten.transpose.int %1110, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1306 = torch.aten.transpose.int %1019, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1307 = torch.aten.transpose.int %1024, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1308 = torch.aten.transpose.int %1000, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1309 = torch.aten.transpose.int %1005, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1310 = torch.aten.transpose.int %914, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1311 = torch.aten.transpose.int %919, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1312 = torch.aten.transpose.int %895, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1313 = torch.aten.transpose.int %900, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1314 = torch.aten.transpose.int %809, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1315 = torch.aten.transpose.int %814, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1316 = torch.aten.transpose.int %790, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1317 = torch.aten.transpose.int %795, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1318 = torch.aten.transpose.int %704, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1319 = torch.aten.transpose.int %709, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1320 = torch.aten.transpose.int %685, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1321 = torch.aten.transpose.int %690, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1322 = torch.aten.transpose.int %599, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1323 = torch.aten.transpose.int %604, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1324 = torch.aten.transpose.int %580, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1325 = torch.aten.transpose.int %585, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1326 = torch.aten.transpose.int %494, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1327 = torch.aten.transpose.int %499, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1328 = torch.aten.transpose.int %475, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1329 = torch.aten.transpose.int %480, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1330 = torch.aten.transpose.int %389, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1331 = torch.aten.transpose.int %394, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1332 = torch.aten.transpose.int %370, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1333 = torch.aten.transpose.int %375, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1334 = torch.aten.transpose.int %284, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1335 = torch.aten.transpose.int %289, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1336 = torch.aten.transpose.int %265, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1337 = torch.aten.transpose.int %270, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1338 = torch.aten.transpose.int %179, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1339 = torch.aten.transpose.int %184, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1340 = torch.aten.transpose.int %160, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1341 = torch.aten.transpose.int %165, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1342 = torch.aten.transpose.int %74, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1343 = torch.aten.transpose.int %79, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1344 = torch.aten.transpose.int %55, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1345 = torch.aten.transpose.int %60, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1346 = torch.prim.ListConstruct %output, %1292, %307, %result2_20, %arg99, %result2_50, %1014, %1275, %arg37, %result1_16, %825, %591, %1076, %arg121, %arg38, %446, %result2_5, %arg162, %arg27, %1338, %1245, %result2_11, %result1_7, %arg101, %1224, %arg77, %arg137, %124, %arg95, %1297, %510, %result1_10, %arg117, %321, %594, %733, %arg178, %arg17, %540, %arg134, %arg50, %964, %result1_52, %arg71, %720, %arg41, %arg34, %arg175, %result2_2, %arg61, %arg54, %5, %1116, %204, %arg119, %arg66, %arg201, %result1_37, %arg31, %arg91, %1322, %457, %1330, %result1_46, %1153, %1314, %arg1, %97, %1332, %arg113, %656, %result2_35, %arg67, %arg131, %arg39, %arg195, %arg167, %1221, %334, %26, %1252, %328, %arg150, %result1_34, %517, %1301, %628, %519, %853, %arg189, %result1_4, %result2_56, %1334, %1303, %arg159, %906, %arg45, %279, %439, %212, %result2_41, %1325, %855, %arg153, %1335, %859, %arg82, %877, %489, %arg29, %result2_44, %arg182, %118, %result1_22, %1087, %arg151, %result1_49, %381, %arg146, %1316, %arg141, %1308, %6, %result2_47, %arg193, %947, %1324, %arg93, %622, %arg22, %1326, %arg43, %arg187, %result2_53, %615, %1340, %216, %982, %result2_23, %result1_28, %15, %arg139, %arg194, %arg155, %arg204, %66, %result1_43, %arg47, %667, %1056, %1310, %arg147, %result1_73, %1157, %1329, %arg183, %arg181, %1319, %741, %1300, %384, %1328, %804, %arg105, %1323, %69, %195, %1052, %1266, %1140, %arg18, %result2_38, %223, %300, %arg107, %645, %208, %result1_70, %832, %433, %171, %arg86, %939, %958, %1344, %result1, %562, %1304, %arg75, %1069, %90, %486, %arg65, %arg191, %418, %1147, %313, %arg161, %arg125, %1339, %result1_13, %result2_68, %arg165, %arg19, %426, %1342, %971, %arg69, %result2_8, %arg102, %result2_17, %arg2, %943, %1279, %arg89, %276, %412, %435, %1313, %arg129, %1315, %arg81, %750, %arg63, %1011, %1161, %1273, %1174, %1299, %arg111, %result1_55, %544, %801, %arg185, %930, %arg179, %arg114, %arg49, %699, %1288, %result1_31, %1309, %1044, %1048, %1262, %1305, %19, %result2_26, %arg98, %1306, %arg15, %937, %696, %arg85, %229, %761, %174, %result2_71, %arg87, %arg135, %643, %909, %result2_65, %1336, %arg123, %arg55, %arg59, %total_weight, %772, %1333, %result1_67, %1318, %arg130, %1168, %arg133, %247, %754, %arg97, %result1_64, %result2, %1331, %result1_40, %842, %arg21, %result1_25, %1321, %330, %737, %result2_14, %result1_58, %result2_29, %1149, %99, %352, %arg11, %142, %1254, %309, %arg25, %551, %arg109, %1345, %1258, %1343, %arg70, %arg202, %arg207, %414, %arg127, %834, %arg33, %arg57, %846, %624, %538, %arg143, %result1_19, %1302, %result1_1, %202, %1311, %1341, %1327, %37, %arg79, %arg197, %1337, %11, %result2_32, %1312, %649, %422, %236, %727, %arg149, %arg200, %arg177, %arg169, %1063, %1296, %1320, %arg7, %arg163, %951, %1297, %729, %960, %result2_62, %341, %1119, %result2_74, %arg118, %1283, %120, %arg115, %1181, %arg13, %107, %arg73, %1298, %arg53, %1170, %result2_59, %result1_61, %arg51, %arg173, %arg9, %632, %866, %arg145, %131, %arg35, %1065, %636, %748, %405, %1042, %arg83, %1192, %527, %103, %838, %225, %arg171, %1035, %531, %arg166, %arg23, %arg103, %arg6, %111, %1317, %1307, %arg157, %317, %523 : (!torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor) -> !torch.list<!torch.tensor>
return %1346 : !torch.list<!torch.tensor>
}
torch.class_type @__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule {
torch.method "forward", @__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule.forward
}
%0 = torch.nn_module {
} : !torch.nn.Module<"__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule">
}
// -----// IR Dump After PrepareForGlobalizeObjectGraph //----- //
module attributes {torch.debug_module_name = "GraphModule"} {
func private @__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule.forward(%arg0: !torch.nn.Module<"__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule">, %arg1: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg2: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg3: !torch.tensor {torch.type_bound = !torch.vtensor<[512,768],f32>}, %arg4: !torch.tensor {torch.type_bound = !torch.vtensor<[2,768],f32>}, %arg5: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,768],f32>}, %arg6: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg7: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg8: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg9: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg10: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg11: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg12: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg13: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg14: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg15: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg16: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg17: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg18: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg19: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg20: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg21: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg22: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg23: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg24: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg25: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg26: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg27: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg28: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg29: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg30: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg31: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg32: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg33: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg34: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg35: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg36: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg37: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg38: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg39: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg40: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg41: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg42: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg43: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg44: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg45: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg46: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg47: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg48: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg49: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg50: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg51: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg52: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg53: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg54: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg55: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg56: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg57: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg58: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg59: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg60: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg61: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg62: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg63: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg64: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg65: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg66: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg67: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg68: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg69: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg70: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg71: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg72: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg73: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg74: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg75: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg76: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg77: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg78: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg79: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg80: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg81: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg82: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg83: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg84: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg85: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg86: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg87: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg88: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg89: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg90: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg91: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg92: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg93: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg94: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg95: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg96: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg97: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg98: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg99: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg100: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg101: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg102: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg103: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg104: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg105: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg106: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg107: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg108: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg109: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg110: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg111: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg112: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg113: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg114: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg115: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg116: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg117: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg118: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg119: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg120: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg121: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg122: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg123: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg124: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg125: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg126: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg127: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg128: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg129: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg130: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg131: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg132: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg133: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg134: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg135: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg136: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg137: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg138: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg139: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg140: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg141: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg142: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg143: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg144: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg145: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg146: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg147: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg148: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg149: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg150: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg151: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg152: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg153: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg154: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg155: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg156: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg157: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg158: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg159: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg160: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg161: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg162: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg163: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg164: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg165: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg166: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg167: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg168: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg169: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg170: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg171: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg172: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg173: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg174: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg175: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg176: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg177: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg178: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg179: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg180: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg181: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg182: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg183: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg184: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg185: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg186: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg187: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg188: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg189: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg190: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg191: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg192: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg193: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg194: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg195: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg196: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg197: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg198: !torch.tensor {torch.type_bound = !torch.vtensor<[30522],f32>}, %arg199: !torch.tensor {torch.type_bound = !torch.vtensor<[30522],f32>}, %arg200: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,768],f32>}, %arg201: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg202: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg203: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg204: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg205: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg206: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg207: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512],si64>}, %arg208: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512],si64>}) -> !torch.list<!torch.tensor> {
%none = torch.constant.none
%false = torch.constant.bool false
%int0 = torch.constant.int 0
%int9223372036854775807 = torch.constant.int 9223372036854775807
%int4 = torch.constant.int 4
%int512 = torch.constant.int 512
%int768 = torch.constant.int 768
%float9.999990e-13 = torch.constant.float 9.9999999999999998E-13
%float9.000000e-01 = torch.constant.float 9.000000e-01
%int2048 = torch.constant.int 2048
%int12 = torch.constant.int 12
%int64 = torch.constant.int 64
%int2 = torch.constant.int 2
%int1 = torch.constant.int 1
%int3 = torch.constant.int 3
%int-1 = torch.constant.int -1
%int-2 = torch.constant.int -2
%int48 = torch.constant.int 48
%int3072 = torch.constant.int 3072
%int30522 = torch.constant.int 30522
%int-100 = torch.constant.int -100
%1 = torch.tensor.literal(dense<-0.000000e+00> : tensor<4x1x1x512xf32>) : !torch.tensor<[4,1,1,512],f32>
%2 = torch.tensor.literal(dense<8.000000e+00> : tensor<f64>) : !torch.tensor<[],f64>
%3 = torch.aten.slice.Tensor %arg206, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%4 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%5 = torch.aten.expand %3, %4, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%6 = torch.aten.slice.Tensor %arg205, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%7 = torch.aten.embedding %arg5, %arg207, %int0, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%8 = torch.aten.embedding %arg4, %5, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%9 = torch.aten.add.Tensor %7, %8, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%10 = torch.aten.embedding %arg3, %6, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%11 = torch.aten.add_.Tensor %9, %10, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%12 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0, %result1, %result2 = torch.aten.native_layer_norm %11, %12, %arg2, %arg1, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%13 = torch.aten.empty_like %result0, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%14 = torch.operator "aten.bernoulli_.float"(%13, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%15 = torch.aten.div_.Scalar %14, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%16 = torch.aten.mul.Tensor %result0, %15 : !torch.tensor, !torch.tensor -> !torch.tensor
%17 = torch.aten.t %arg13 : !torch.tensor -> !torch.tensor
%18 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%19 = torch.aten.view %16, %18 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%20 = torch.aten.mm %19, %17 : !torch.tensor, !torch.tensor -> !torch.tensor
%21 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%22 = torch.operator "aten._unsafe_view"(%20, %21) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%23 = torch.aten.add_.Tensor %22, %arg12, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%24 = torch.aten.t %arg11 : !torch.tensor -> !torch.tensor
%25 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%26 = torch.aten.view %16, %25 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%27 = torch.aten.mm %26, %24 : !torch.tensor, !torch.tensor -> !torch.tensor
%28 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%29 = torch.operator "aten._unsafe_view"(%27, %28) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%30 = torch.aten.add_.Tensor %29, %arg10, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%31 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%32 = torch.aten.view %30, %31 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%33 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%34 = torch.aten.permute %32, %33 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%35 = torch.aten.t %arg15 : !torch.tensor -> !torch.tensor
%36 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%37 = torch.aten.view %16, %36 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%38 = torch.aten.mm %37, %35 : !torch.tensor, !torch.tensor -> !torch.tensor
%39 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%40 = torch.operator "aten._unsafe_view"(%38, %39) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%41 = torch.aten.add_.Tensor %40, %arg14, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%42 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%43 = torch.aten.view %41, %42 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%44 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%45 = torch.aten.permute %43, %44 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%46 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%47 = torch.aten.view %23, %46 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%48 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%49 = torch.aten.permute %47, %48 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%50 = torch.aten.transpose.int %34, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%51 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%52 = torch.aten.expand %49, %51, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%53 = torch.operator "aten.clone"(%52, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%54 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%55 = torch.operator "aten._unsafe_view"(%53, %54) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%56 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%57 = torch.aten.expand %50, %56, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%58 = torch.operator "aten.clone"(%57, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%59 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%60 = torch.operator "aten._unsafe_view"(%58, %59) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%61 = torch.aten.bmm %55, %60 : !torch.tensor, !torch.tensor -> !torch.tensor
%62 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%63 = torch.operator "aten._unsafe_view"(%61, %62) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%64 = torch.aten.div.Tensor %63, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%65 = torch.aten.add.Tensor %64, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%66 = torch.aten._softmax %65, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%67 = torch.aten.empty_like %66, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%68 = torch.operator "aten.bernoulli_.float"(%67, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%69 = torch.aten.div_.Scalar %68, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%70 = torch.aten.mul.Tensor %66, %69 : !torch.tensor, !torch.tensor -> !torch.tensor
%71 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%72 = torch.aten.expand %70, %71, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%73 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%74 = torch.aten.view %72, %73 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%75 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%76 = torch.aten.expand %45, %75, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%77 = torch.operator "aten.clone"(%76, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%78 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%79 = torch.operator "aten._unsafe_view"(%77, %78) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%80 = torch.aten.bmm %74, %79 : !torch.tensor, !torch.tensor -> !torch.tensor
%81 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%82 = torch.operator "aten._unsafe_view"(%80, %81) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%83 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%84 = torch.aten.permute %82, %83 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%85 = torch.operator "aten.clone"(%84, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%86 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%87 = torch.aten.view %85, %86 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%88 = torch.aten.t %arg9 : !torch.tensor -> !torch.tensor
%89 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%90 = torch.aten.view %87, %89 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%91 = torch.aten.mm %90, %88 : !torch.tensor, !torch.tensor -> !torch.tensor
%92 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%93 = torch.operator "aten._unsafe_view"(%91, %92) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%94 = torch.aten.add_.Tensor %93, %arg8, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%95 = torch.aten.empty_like %94, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%96 = torch.operator "aten.bernoulli_.float"(%95, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%97 = torch.aten.div_.Scalar %96, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%98 = torch.aten.mul.Tensor %94, %97 : !torch.tensor, !torch.tensor -> !torch.tensor
%99 = torch.aten.add.Tensor %98, %16, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%100 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_0, %result1_1, %result2_2 = torch.aten.native_layer_norm %99, %100, %arg7, %arg6, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%101 = torch.aten.t %arg17 : !torch.tensor -> !torch.tensor
%102 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%103 = torch.aten.view %result0_0, %102 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%104 = torch.aten.mm %103, %101 : !torch.tensor, !torch.tensor -> !torch.tensor
%105 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%106 = torch.operator "aten._unsafe_view"(%104, %105) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%107 = torch.aten.add_.Tensor %106, %arg16, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%108 = torch.aten.gelu %107 : !torch.tensor -> !torch.tensor
%109 = torch.aten.t %arg21 : !torch.tensor -> !torch.tensor
%110 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%111 = torch.aten.view %108, %110 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%112 = torch.aten.mm %111, %109 : !torch.tensor, !torch.tensor -> !torch.tensor
%113 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%114 = torch.operator "aten._unsafe_view"(%112, %113) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%115 = torch.aten.add_.Tensor %114, %arg20, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%116 = torch.aten.empty_like %115, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%117 = torch.operator "aten.bernoulli_.float"(%116, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%118 = torch.aten.div_.Scalar %117, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%119 = torch.aten.mul.Tensor %115, %118 : !torch.tensor, !torch.tensor -> !torch.tensor
%120 = torch.aten.add.Tensor %119, %result0_0, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%121 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_3, %result1_4, %result2_5 = torch.aten.native_layer_norm %120, %121, %arg19, %arg18, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%122 = torch.aten.t %arg29 : !torch.tensor -> !torch.tensor
%123 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%124 = torch.aten.view %result0_3, %123 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%125 = torch.aten.mm %124, %122 : !torch.tensor, !torch.tensor -> !torch.tensor
%126 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%127 = torch.operator "aten._unsafe_view"(%125, %126) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%128 = torch.aten.add_.Tensor %127, %arg28, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%129 = torch.aten.t %arg27 : !torch.tensor -> !torch.tensor
%130 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%131 = torch.aten.view %result0_3, %130 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%132 = torch.aten.mm %131, %129 : !torch.tensor, !torch.tensor -> !torch.tensor
%133 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%134 = torch.operator "aten._unsafe_view"(%132, %133) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%135 = torch.aten.add_.Tensor %134, %arg26, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%136 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%137 = torch.aten.view %135, %136 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%138 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%139 = torch.aten.permute %137, %138 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%140 = torch.aten.t %arg31 : !torch.tensor -> !torch.tensor
%141 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%142 = torch.aten.view %result0_3, %141 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%143 = torch.aten.mm %142, %140 : !torch.tensor, !torch.tensor -> !torch.tensor
%144 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%145 = torch.operator "aten._unsafe_view"(%143, %144) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%146 = torch.aten.add_.Tensor %145, %arg30, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%147 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%148 = torch.aten.view %146, %147 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%149 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%150 = torch.aten.permute %148, %149 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%151 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%152 = torch.aten.view %128, %151 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%153 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%154 = torch.aten.permute %152, %153 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%155 = torch.aten.transpose.int %139, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%156 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%157 = torch.aten.expand %154, %156, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%158 = torch.operator "aten.clone"(%157, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%159 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%160 = torch.operator "aten._unsafe_view"(%158, %159) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%161 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%162 = torch.aten.expand %155, %161, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%163 = torch.operator "aten.clone"(%162, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%164 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%165 = torch.operator "aten._unsafe_view"(%163, %164) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%166 = torch.aten.bmm %160, %165 : !torch.tensor, !torch.tensor -> !torch.tensor
%167 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%168 = torch.operator "aten._unsafe_view"(%166, %167) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%169 = torch.aten.div.Tensor %168, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%170 = torch.aten.add.Tensor %169, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%171 = torch.aten._softmax %170, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%172 = torch.aten.empty_like %171, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%173 = torch.operator "aten.bernoulli_.float"(%172, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%174 = torch.aten.div_.Scalar %173, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%175 = torch.aten.mul.Tensor %171, %174 : !torch.tensor, !torch.tensor -> !torch.tensor
%176 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%177 = torch.aten.expand %175, %176, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%178 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%179 = torch.aten.view %177, %178 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%180 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%181 = torch.aten.expand %150, %180, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%182 = torch.operator "aten.clone"(%181, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%183 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%184 = torch.operator "aten._unsafe_view"(%182, %183) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%185 = torch.aten.bmm %179, %184 : !torch.tensor, !torch.tensor -> !torch.tensor
%186 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%187 = torch.operator "aten._unsafe_view"(%185, %186) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%188 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%189 = torch.aten.permute %187, %188 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%190 = torch.operator "aten.clone"(%189, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%191 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%192 = torch.aten.view %190, %191 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%193 = torch.aten.t %arg25 : !torch.tensor -> !torch.tensor
%194 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%195 = torch.aten.view %192, %194 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%196 = torch.aten.mm %195, %193 : !torch.tensor, !torch.tensor -> !torch.tensor
%197 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%198 = torch.operator "aten._unsafe_view"(%196, %197) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%199 = torch.aten.add_.Tensor %198, %arg24, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%200 = torch.aten.empty_like %199, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%201 = torch.operator "aten.bernoulli_.float"(%200, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%202 = torch.aten.div_.Scalar %201, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%203 = torch.aten.mul.Tensor %199, %202 : !torch.tensor, !torch.tensor -> !torch.tensor
%204 = torch.aten.add.Tensor %203, %result0_3, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%205 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_6, %result1_7, %result2_8 = torch.aten.native_layer_norm %204, %205, %arg23, %arg22, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%206 = torch.aten.t %arg33 : !torch.tensor -> !torch.tensor
%207 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%208 = torch.aten.view %result0_6, %207 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%209 = torch.aten.mm %208, %206 : !torch.tensor, !torch.tensor -> !torch.tensor
%210 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%211 = torch.operator "aten._unsafe_view"(%209, %210) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%212 = torch.aten.add_.Tensor %211, %arg32, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%213 = torch.aten.gelu %212 : !torch.tensor -> !torch.tensor
%214 = torch.aten.t %arg37 : !torch.tensor -> !torch.tensor
%215 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%216 = torch.aten.view %213, %215 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%217 = torch.aten.mm %216, %214 : !torch.tensor, !torch.tensor -> !torch.tensor
%218 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%219 = torch.operator "aten._unsafe_view"(%217, %218) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%220 = torch.aten.add_.Tensor %219, %arg36, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%221 = torch.aten.empty_like %220, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%222 = torch.operator "aten.bernoulli_.float"(%221, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%223 = torch.aten.div_.Scalar %222, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%224 = torch.aten.mul.Tensor %220, %223 : !torch.tensor, !torch.tensor -> !torch.tensor
%225 = torch.aten.add.Tensor %224, %result0_6, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%226 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_9, %result1_10, %result2_11 = torch.aten.native_layer_norm %225, %226, %arg35, %arg34, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%227 = torch.aten.t %arg77 : !torch.tensor -> !torch.tensor
%228 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%229 = torch.aten.view %result0_9, %228 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%230 = torch.aten.mm %229, %227 : !torch.tensor, !torch.tensor -> !torch.tensor
%231 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%232 = torch.operator "aten._unsafe_view"(%230, %231) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%233 = torch.aten.add_.Tensor %232, %arg76, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%234 = torch.aten.t %arg75 : !torch.tensor -> !torch.tensor
%235 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%236 = torch.aten.view %result0_9, %235 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%237 = torch.aten.mm %236, %234 : !torch.tensor, !torch.tensor -> !torch.tensor
%238 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%239 = torch.operator "aten._unsafe_view"(%237, %238) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%240 = torch.aten.add_.Tensor %239, %arg74, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%241 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%242 = torch.aten.view %240, %241 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%243 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%244 = torch.aten.permute %242, %243 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%245 = torch.aten.t %arg79 : !torch.tensor -> !torch.tensor
%246 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%247 = torch.aten.view %result0_9, %246 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%248 = torch.aten.mm %247, %245 : !torch.tensor, !torch.tensor -> !torch.tensor
%249 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%250 = torch.operator "aten._unsafe_view"(%248, %249) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%251 = torch.aten.add_.Tensor %250, %arg78, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%252 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%253 = torch.aten.view %251, %252 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%254 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%255 = torch.aten.permute %253, %254 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%256 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%257 = torch.aten.view %233, %256 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%258 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%259 = torch.aten.permute %257, %258 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%260 = torch.aten.transpose.int %244, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%261 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%262 = torch.aten.expand %259, %261, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%263 = torch.operator "aten.clone"(%262, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%264 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%265 = torch.operator "aten._unsafe_view"(%263, %264) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%266 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%267 = torch.aten.expand %260, %266, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%268 = torch.operator "aten.clone"(%267, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%269 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%270 = torch.operator "aten._unsafe_view"(%268, %269) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%271 = torch.aten.bmm %265, %270 : !torch.tensor, !torch.tensor -> !torch.tensor
%272 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%273 = torch.operator "aten._unsafe_view"(%271, %272) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%274 = torch.aten.div.Tensor %273, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%275 = torch.aten.add.Tensor %274, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%276 = torch.aten._softmax %275, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%277 = torch.aten.empty_like %276, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%278 = torch.operator "aten.bernoulli_.float"(%277, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%279 = torch.aten.div_.Scalar %278, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%280 = torch.aten.mul.Tensor %276, %279 : !torch.tensor, !torch.tensor -> !torch.tensor
%281 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%282 = torch.aten.expand %280, %281, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%283 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%284 = torch.aten.view %282, %283 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%285 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%286 = torch.aten.expand %255, %285, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%287 = torch.operator "aten.clone"(%286, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%288 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%289 = torch.operator "aten._unsafe_view"(%287, %288) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%290 = torch.aten.bmm %284, %289 : !torch.tensor, !torch.tensor -> !torch.tensor
%291 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%292 = torch.operator "aten._unsafe_view"(%290, %291) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%293 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%294 = torch.aten.permute %292, %293 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%295 = torch.operator "aten.clone"(%294, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%296 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%297 = torch.aten.view %295, %296 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%298 = torch.aten.t %arg73 : !torch.tensor -> !torch.tensor
%299 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%300 = torch.aten.view %297, %299 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%301 = torch.aten.mm %300, %298 : !torch.tensor, !torch.tensor -> !torch.tensor
%302 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%303 = torch.operator "aten._unsafe_view"(%301, %302) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%304 = torch.aten.add_.Tensor %303, %arg72, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%305 = torch.aten.empty_like %304, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%306 = torch.operator "aten.bernoulli_.float"(%305, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%307 = torch.aten.div_.Scalar %306, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%308 = torch.aten.mul.Tensor %304, %307 : !torch.tensor, !torch.tensor -> !torch.tensor
%309 = torch.aten.add.Tensor %308, %result0_9, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%310 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_12, %result1_13, %result2_14 = torch.aten.native_layer_norm %309, %310, %arg71, %arg70, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%311 = torch.aten.t %arg81 : !torch.tensor -> !torch.tensor
%312 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%313 = torch.aten.view %result0_12, %312 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%314 = torch.aten.mm %313, %311 : !torch.tensor, !torch.tensor -> !torch.tensor
%315 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%316 = torch.operator "aten._unsafe_view"(%314, %315) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%317 = torch.aten.add_.Tensor %316, %arg80, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%318 = torch.aten.gelu %317 : !torch.tensor -> !torch.tensor
%319 = torch.aten.t %arg85 : !torch.tensor -> !torch.tensor
%320 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%321 = torch.aten.view %318, %320 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%322 = torch.aten.mm %321, %319 : !torch.tensor, !torch.tensor -> !torch.tensor
%323 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%324 = torch.operator "aten._unsafe_view"(%322, %323) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%325 = torch.aten.add_.Tensor %324, %arg84, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%326 = torch.aten.empty_like %325, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%327 = torch.operator "aten.bernoulli_.float"(%326, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%328 = torch.aten.div_.Scalar %327, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%329 = torch.aten.mul.Tensor %325, %328 : !torch.tensor, !torch.tensor -> !torch.tensor
%330 = torch.aten.add.Tensor %329, %result0_12, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%331 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_15, %result1_16, %result2_17 = torch.aten.native_layer_norm %330, %331, %arg83, %arg82, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%332 = torch.aten.t %arg93 : !torch.tensor -> !torch.tensor
%333 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%334 = torch.aten.view %result0_15, %333 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%335 = torch.aten.mm %334, %332 : !torch.tensor, !torch.tensor -> !torch.tensor
%336 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%337 = torch.operator "aten._unsafe_view"(%335, %336) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%338 = torch.aten.add_.Tensor %337, %arg92, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%339 = torch.aten.t %arg91 : !torch.tensor -> !torch.tensor
%340 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%341 = torch.aten.view %result0_15, %340 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%342 = torch.aten.mm %341, %339 : !torch.tensor, !torch.tensor -> !torch.tensor
%343 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%344 = torch.operator "aten._unsafe_view"(%342, %343) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%345 = torch.aten.add_.Tensor %344, %arg90, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%346 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%347 = torch.aten.view %345, %346 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%348 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%349 = torch.aten.permute %347, %348 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%350 = torch.aten.t %arg95 : !torch.tensor -> !torch.tensor
%351 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%352 = torch.aten.view %result0_15, %351 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%353 = torch.aten.mm %352, %350 : !torch.tensor, !torch.tensor -> !torch.tensor
%354 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%355 = torch.operator "aten._unsafe_view"(%353, %354) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%356 = torch.aten.add_.Tensor %355, %arg94, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%357 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%358 = torch.aten.view %356, %357 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%359 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%360 = torch.aten.permute %358, %359 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%361 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%362 = torch.aten.view %338, %361 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%363 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%364 = torch.aten.permute %362, %363 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%365 = torch.aten.transpose.int %349, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%366 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%367 = torch.aten.expand %364, %366, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%368 = torch.operator "aten.clone"(%367, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%369 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%370 = torch.operator "aten._unsafe_view"(%368, %369) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%371 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%372 = torch.aten.expand %365, %371, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%373 = torch.operator "aten.clone"(%372, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%374 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%375 = torch.operator "aten._unsafe_view"(%373, %374) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%376 = torch.aten.bmm %370, %375 : !torch.tensor, !torch.tensor -> !torch.tensor
%377 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%378 = torch.operator "aten._unsafe_view"(%376, %377) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%379 = torch.aten.div.Tensor %378, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%380 = torch.aten.add.Tensor %379, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%381 = torch.aten._softmax %380, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%382 = torch.aten.empty_like %381, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%383 = torch.operator "aten.bernoulli_.float"(%382, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%384 = torch.aten.div_.Scalar %383, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%385 = torch.aten.mul.Tensor %381, %384 : !torch.tensor, !torch.tensor -> !torch.tensor
%386 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%387 = torch.aten.expand %385, %386, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%388 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%389 = torch.aten.view %387, %388 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%390 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%391 = torch.aten.expand %360, %390, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%392 = torch.operator "aten.clone"(%391, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%393 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%394 = torch.operator "aten._unsafe_view"(%392, %393) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%395 = torch.aten.bmm %389, %394 : !torch.tensor, !torch.tensor -> !torch.tensor
%396 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%397 = torch.operator "aten._unsafe_view"(%395, %396) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%398 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%399 = torch.aten.permute %397, %398 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%400 = torch.operator "aten.clone"(%399, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%401 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%402 = torch.aten.view %400, %401 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%403 = torch.aten.t %arg89 : !torch.tensor -> !torch.tensor
%404 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%405 = torch.aten.view %402, %404 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%406 = torch.aten.mm %405, %403 : !torch.tensor, !torch.tensor -> !torch.tensor
%407 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%408 = torch.operator "aten._unsafe_view"(%406, %407) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%409 = torch.aten.add_.Tensor %408, %arg88, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%410 = torch.aten.empty_like %409, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%411 = torch.operator "aten.bernoulli_.float"(%410, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%412 = torch.aten.div_.Scalar %411, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%413 = torch.aten.mul.Tensor %409, %412 : !torch.tensor, !torch.tensor -> !torch.tensor
%414 = torch.aten.add.Tensor %413, %result0_15, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%415 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_18, %result1_19, %result2_20 = torch.aten.native_layer_norm %414, %415, %arg87, %arg86, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%416 = torch.aten.t %arg97 : !torch.tensor -> !torch.tensor
%417 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%418 = torch.aten.view %result0_18, %417 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%419 = torch.aten.mm %418, %416 : !torch.tensor, !torch.tensor -> !torch.tensor
%420 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%421 = torch.operator "aten._unsafe_view"(%419, %420) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%422 = torch.aten.add_.Tensor %421, %arg96, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%423 = torch.aten.gelu %422 : !torch.tensor -> !torch.tensor
%424 = torch.aten.t %arg101 : !torch.tensor -> !torch.tensor
%425 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%426 = torch.aten.view %423, %425 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%427 = torch.aten.mm %426, %424 : !torch.tensor, !torch.tensor -> !torch.tensor
%428 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%429 = torch.operator "aten._unsafe_view"(%427, %428) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%430 = torch.aten.add_.Tensor %429, %arg100, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%431 = torch.aten.empty_like %430, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%432 = torch.operator "aten.bernoulli_.float"(%431, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%433 = torch.aten.div_.Scalar %432, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%434 = torch.aten.mul.Tensor %430, %433 : !torch.tensor, !torch.tensor -> !torch.tensor
%435 = torch.aten.add.Tensor %434, %result0_18, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%436 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_21, %result1_22, %result2_23 = torch.aten.native_layer_norm %435, %436, %arg99, %arg98, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%437 = torch.aten.t %arg109 : !torch.tensor -> !torch.tensor
%438 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%439 = torch.aten.view %result0_21, %438 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%440 = torch.aten.mm %439, %437 : !torch.tensor, !torch.tensor -> !torch.tensor
%441 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%442 = torch.operator "aten._unsafe_view"(%440, %441) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%443 = torch.aten.add_.Tensor %442, %arg108, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%444 = torch.aten.t %arg107 : !torch.tensor -> !torch.tensor
%445 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%446 = torch.aten.view %result0_21, %445 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%447 = torch.aten.mm %446, %444 : !torch.tensor, !torch.tensor -> !torch.tensor
%448 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%449 = torch.operator "aten._unsafe_view"(%447, %448) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%450 = torch.aten.add_.Tensor %449, %arg106, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%451 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%452 = torch.aten.view %450, %451 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%453 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%454 = torch.aten.permute %452, %453 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%455 = torch.aten.t %arg111 : !torch.tensor -> !torch.tensor
%456 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%457 = torch.aten.view %result0_21, %456 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%458 = torch.aten.mm %457, %455 : !torch.tensor, !torch.tensor -> !torch.tensor
%459 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%460 = torch.operator "aten._unsafe_view"(%458, %459) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%461 = torch.aten.add_.Tensor %460, %arg110, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%462 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%463 = torch.aten.view %461, %462 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%464 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%465 = torch.aten.permute %463, %464 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%466 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%467 = torch.aten.view %443, %466 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%468 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%469 = torch.aten.permute %467, %468 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%470 = torch.aten.transpose.int %454, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%471 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%472 = torch.aten.expand %469, %471, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%473 = torch.operator "aten.clone"(%472, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%474 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%475 = torch.operator "aten._unsafe_view"(%473, %474) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%476 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%477 = torch.aten.expand %470, %476, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%478 = torch.operator "aten.clone"(%477, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%479 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%480 = torch.operator "aten._unsafe_view"(%478, %479) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%481 = torch.aten.bmm %475, %480 : !torch.tensor, !torch.tensor -> !torch.tensor
%482 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%483 = torch.operator "aten._unsafe_view"(%481, %482) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%484 = torch.aten.div.Tensor %483, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%485 = torch.aten.add.Tensor %484, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%486 = torch.aten._softmax %485, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%487 = torch.aten.empty_like %486, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%488 = torch.operator "aten.bernoulli_.float"(%487, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%489 = torch.aten.div_.Scalar %488, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%490 = torch.aten.mul.Tensor %486, %489 : !torch.tensor, !torch.tensor -> !torch.tensor
%491 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%492 = torch.aten.expand %490, %491, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%493 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%494 = torch.aten.view %492, %493 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%495 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%496 = torch.aten.expand %465, %495, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%497 = torch.operator "aten.clone"(%496, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%498 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%499 = torch.operator "aten._unsafe_view"(%497, %498) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%500 = torch.aten.bmm %494, %499 : !torch.tensor, !torch.tensor -> !torch.tensor
%501 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%502 = torch.operator "aten._unsafe_view"(%500, %501) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%503 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%504 = torch.aten.permute %502, %503 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%505 = torch.operator "aten.clone"(%504, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%506 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%507 = torch.aten.view %505, %506 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%508 = torch.aten.t %arg105 : !torch.tensor -> !torch.tensor
%509 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%510 = torch.aten.view %507, %509 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%511 = torch.aten.mm %510, %508 : !torch.tensor, !torch.tensor -> !torch.tensor
%512 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%513 = torch.operator "aten._unsafe_view"(%511, %512) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%514 = torch.aten.add_.Tensor %513, %arg104, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%515 = torch.aten.empty_like %514, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%516 = torch.operator "aten.bernoulli_.float"(%515, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%517 = torch.aten.div_.Scalar %516, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%518 = torch.aten.mul.Tensor %514, %517 : !torch.tensor, !torch.tensor -> !torch.tensor
%519 = torch.aten.add.Tensor %518, %result0_21, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%520 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_24, %result1_25, %result2_26 = torch.aten.native_layer_norm %519, %520, %arg103, %arg102, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%521 = torch.aten.t %arg113 : !torch.tensor -> !torch.tensor
%522 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%523 = torch.aten.view %result0_24, %522 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%524 = torch.aten.mm %523, %521 : !torch.tensor, !torch.tensor -> !torch.tensor
%525 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%526 = torch.operator "aten._unsafe_view"(%524, %525) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%527 = torch.aten.add_.Tensor %526, %arg112, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%528 = torch.aten.gelu %527 : !torch.tensor -> !torch.tensor
%529 = torch.aten.t %arg117 : !torch.tensor -> !torch.tensor
%530 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%531 = torch.aten.view %528, %530 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%532 = torch.aten.mm %531, %529 : !torch.tensor, !torch.tensor -> !torch.tensor
%533 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%534 = torch.operator "aten._unsafe_view"(%532, %533) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%535 = torch.aten.add_.Tensor %534, %arg116, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%536 = torch.aten.empty_like %535, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%537 = torch.operator "aten.bernoulli_.float"(%536, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%538 = torch.aten.div_.Scalar %537, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%539 = torch.aten.mul.Tensor %535, %538 : !torch.tensor, !torch.tensor -> !torch.tensor
%540 = torch.aten.add.Tensor %539, %result0_24, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%541 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_27, %result1_28, %result2_29 = torch.aten.native_layer_norm %540, %541, %arg115, %arg114, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%542 = torch.aten.t %arg125 : !torch.tensor -> !torch.tensor
%543 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%544 = torch.aten.view %result0_27, %543 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%545 = torch.aten.mm %544, %542 : !torch.tensor, !torch.tensor -> !torch.tensor
%546 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%547 = torch.operator "aten._unsafe_view"(%545, %546) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%548 = torch.aten.add_.Tensor %547, %arg124, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%549 = torch.aten.t %arg123 : !torch.tensor -> !torch.tensor
%550 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%551 = torch.aten.view %result0_27, %550 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%552 = torch.aten.mm %551, %549 : !torch.tensor, !torch.tensor -> !torch.tensor
%553 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%554 = torch.operator "aten._unsafe_view"(%552, %553) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%555 = torch.aten.add_.Tensor %554, %arg122, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%556 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%557 = torch.aten.view %555, %556 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%558 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%559 = torch.aten.permute %557, %558 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%560 = torch.aten.t %arg127 : !torch.tensor -> !torch.tensor
%561 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%562 = torch.aten.view %result0_27, %561 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%563 = torch.aten.mm %562, %560 : !torch.tensor, !torch.tensor -> !torch.tensor
%564 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%565 = torch.operator "aten._unsafe_view"(%563, %564) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%566 = torch.aten.add_.Tensor %565, %arg126, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%567 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%568 = torch.aten.view %566, %567 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%569 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%570 = torch.aten.permute %568, %569 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%571 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%572 = torch.aten.view %548, %571 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%573 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%574 = torch.aten.permute %572, %573 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%575 = torch.aten.transpose.int %559, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%576 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%577 = torch.aten.expand %574, %576, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%578 = torch.operator "aten.clone"(%577, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%579 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%580 = torch.operator "aten._unsafe_view"(%578, %579) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%581 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%582 = torch.aten.expand %575, %581, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%583 = torch.operator "aten.clone"(%582, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%584 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%585 = torch.operator "aten._unsafe_view"(%583, %584) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%586 = torch.aten.bmm %580, %585 : !torch.tensor, !torch.tensor -> !torch.tensor
%587 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%588 = torch.operator "aten._unsafe_view"(%586, %587) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%589 = torch.aten.div.Tensor %588, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%590 = torch.aten.add.Tensor %589, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%591 = torch.aten._softmax %590, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%592 = torch.aten.empty_like %591, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%593 = torch.operator "aten.bernoulli_.float"(%592, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%594 = torch.aten.div_.Scalar %593, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%595 = torch.aten.mul.Tensor %591, %594 : !torch.tensor, !torch.tensor -> !torch.tensor
%596 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%597 = torch.aten.expand %595, %596, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%598 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%599 = torch.aten.view %597, %598 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%600 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%601 = torch.aten.expand %570, %600, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%602 = torch.operator "aten.clone"(%601, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%603 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%604 = torch.operator "aten._unsafe_view"(%602, %603) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%605 = torch.aten.bmm %599, %604 : !torch.tensor, !torch.tensor -> !torch.tensor
%606 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%607 = torch.operator "aten._unsafe_view"(%605, %606) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%608 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%609 = torch.aten.permute %607, %608 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%610 = torch.operator "aten.clone"(%609, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%611 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%612 = torch.aten.view %610, %611 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%613 = torch.aten.t %arg121 : !torch.tensor -> !torch.tensor
%614 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%615 = torch.aten.view %612, %614 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%616 = torch.aten.mm %615, %613 : !torch.tensor, !torch.tensor -> !torch.tensor
%617 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%618 = torch.operator "aten._unsafe_view"(%616, %617) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%619 = torch.aten.add_.Tensor %618, %arg120, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%620 = torch.aten.empty_like %619, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%621 = torch.operator "aten.bernoulli_.float"(%620, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%622 = torch.aten.div_.Scalar %621, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%623 = torch.aten.mul.Tensor %619, %622 : !torch.tensor, !torch.tensor -> !torch.tensor
%624 = torch.aten.add.Tensor %623, %result0_27, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%625 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_30, %result1_31, %result2_32 = torch.aten.native_layer_norm %624, %625, %arg119, %arg118, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%626 = torch.aten.t %arg129 : !torch.tensor -> !torch.tensor
%627 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%628 = torch.aten.view %result0_30, %627 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%629 = torch.aten.mm %628, %626 : !torch.tensor, !torch.tensor -> !torch.tensor
%630 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%631 = torch.operator "aten._unsafe_view"(%629, %630) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%632 = torch.aten.add_.Tensor %631, %arg128, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%633 = torch.aten.gelu %632 : !torch.tensor -> !torch.tensor
%634 = torch.aten.t %arg133 : !torch.tensor -> !torch.tensor
%635 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%636 = torch.aten.view %633, %635 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%637 = torch.aten.mm %636, %634 : !torch.tensor, !torch.tensor -> !torch.tensor
%638 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%639 = torch.operator "aten._unsafe_view"(%637, %638) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%640 = torch.aten.add_.Tensor %639, %arg132, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%641 = torch.aten.empty_like %640, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%642 = torch.operator "aten.bernoulli_.float"(%641, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%643 = torch.aten.div_.Scalar %642, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%644 = torch.aten.mul.Tensor %640, %643 : !torch.tensor, !torch.tensor -> !torch.tensor
%645 = torch.aten.add.Tensor %644, %result0_30, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%646 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_33, %result1_34, %result2_35 = torch.aten.native_layer_norm %645, %646, %arg131, %arg130, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%647 = torch.aten.t %arg141 : !torch.tensor -> !torch.tensor
%648 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%649 = torch.aten.view %result0_33, %648 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%650 = torch.aten.mm %649, %647 : !torch.tensor, !torch.tensor -> !torch.tensor
%651 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%652 = torch.operator "aten._unsafe_view"(%650, %651) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%653 = torch.aten.add_.Tensor %652, %arg140, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%654 = torch.aten.t %arg139 : !torch.tensor -> !torch.tensor
%655 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%656 = torch.aten.view %result0_33, %655 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%657 = torch.aten.mm %656, %654 : !torch.tensor, !torch.tensor -> !torch.tensor
%658 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%659 = torch.operator "aten._unsafe_view"(%657, %658) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%660 = torch.aten.add_.Tensor %659, %arg138, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%661 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%662 = torch.aten.view %660, %661 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%663 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%664 = torch.aten.permute %662, %663 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%665 = torch.aten.t %arg143 : !torch.tensor -> !torch.tensor
%666 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%667 = torch.aten.view %result0_33, %666 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%668 = torch.aten.mm %667, %665 : !torch.tensor, !torch.tensor -> !torch.tensor
%669 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%670 = torch.operator "aten._unsafe_view"(%668, %669) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%671 = torch.aten.add_.Tensor %670, %arg142, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%672 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%673 = torch.aten.view %671, %672 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%674 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%675 = torch.aten.permute %673, %674 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%676 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%677 = torch.aten.view %653, %676 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%678 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%679 = torch.aten.permute %677, %678 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%680 = torch.aten.transpose.int %664, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%681 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%682 = torch.aten.expand %679, %681, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%683 = torch.operator "aten.clone"(%682, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%684 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%685 = torch.operator "aten._unsafe_view"(%683, %684) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%686 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%687 = torch.aten.expand %680, %686, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%688 = torch.operator "aten.clone"(%687, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%689 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%690 = torch.operator "aten._unsafe_view"(%688, %689) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%691 = torch.aten.bmm %685, %690 : !torch.tensor, !torch.tensor -> !torch.tensor
%692 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%693 = torch.operator "aten._unsafe_view"(%691, %692) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%694 = torch.aten.div.Tensor %693, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%695 = torch.aten.add.Tensor %694, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%696 = torch.aten._softmax %695, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%697 = torch.aten.empty_like %696, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%698 = torch.operator "aten.bernoulli_.float"(%697, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%699 = torch.aten.div_.Scalar %698, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%700 = torch.aten.mul.Tensor %696, %699 : !torch.tensor, !torch.tensor -> !torch.tensor
%701 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%702 = torch.aten.expand %700, %701, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%703 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%704 = torch.aten.view %702, %703 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%705 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%706 = torch.aten.expand %675, %705, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%707 = torch.operator "aten.clone"(%706, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%708 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%709 = torch.operator "aten._unsafe_view"(%707, %708) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%710 = torch.aten.bmm %704, %709 : !torch.tensor, !torch.tensor -> !torch.tensor
%711 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%712 = torch.operator "aten._unsafe_view"(%710, %711) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%713 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%714 = torch.aten.permute %712, %713 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%715 = torch.operator "aten.clone"(%714, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%716 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%717 = torch.aten.view %715, %716 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%718 = torch.aten.t %arg137 : !torch.tensor -> !torch.tensor
%719 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%720 = torch.aten.view %717, %719 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%721 = torch.aten.mm %720, %718 : !torch.tensor, !torch.tensor -> !torch.tensor
%722 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%723 = torch.operator "aten._unsafe_view"(%721, %722) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%724 = torch.aten.add_.Tensor %723, %arg136, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%725 = torch.aten.empty_like %724, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%726 = torch.operator "aten.bernoulli_.float"(%725, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%727 = torch.aten.div_.Scalar %726, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%728 = torch.aten.mul.Tensor %724, %727 : !torch.tensor, !torch.tensor -> !torch.tensor
%729 = torch.aten.add.Tensor %728, %result0_33, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%730 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_36, %result1_37, %result2_38 = torch.aten.native_layer_norm %729, %730, %arg135, %arg134, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%731 = torch.aten.t %arg145 : !torch.tensor -> !torch.tensor
%732 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%733 = torch.aten.view %result0_36, %732 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%734 = torch.aten.mm %733, %731 : !torch.tensor, !torch.tensor -> !torch.tensor
%735 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%736 = torch.operator "aten._unsafe_view"(%734, %735) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%737 = torch.aten.add_.Tensor %736, %arg144, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%738 = torch.aten.gelu %737 : !torch.tensor -> !torch.tensor
%739 = torch.aten.t %arg149 : !torch.tensor -> !torch.tensor
%740 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%741 = torch.aten.view %738, %740 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%742 = torch.aten.mm %741, %739 : !torch.tensor, !torch.tensor -> !torch.tensor
%743 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%744 = torch.operator "aten._unsafe_view"(%742, %743) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%745 = torch.aten.add_.Tensor %744, %arg148, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%746 = torch.aten.empty_like %745, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%747 = torch.operator "aten.bernoulli_.float"(%746, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%748 = torch.aten.div_.Scalar %747, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%749 = torch.aten.mul.Tensor %745, %748 : !torch.tensor, !torch.tensor -> !torch.tensor
%750 = torch.aten.add.Tensor %749, %result0_36, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%751 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_39, %result1_40, %result2_41 = torch.aten.native_layer_norm %750, %751, %arg147, %arg146, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%752 = torch.aten.t %arg157 : !torch.tensor -> !torch.tensor
%753 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%754 = torch.aten.view %result0_39, %753 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%755 = torch.aten.mm %754, %752 : !torch.tensor, !torch.tensor -> !torch.tensor
%756 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%757 = torch.operator "aten._unsafe_view"(%755, %756) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%758 = torch.aten.add_.Tensor %757, %arg156, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%759 = torch.aten.t %arg155 : !torch.tensor -> !torch.tensor
%760 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%761 = torch.aten.view %result0_39, %760 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%762 = torch.aten.mm %761, %759 : !torch.tensor, !torch.tensor -> !torch.tensor
%763 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%764 = torch.operator "aten._unsafe_view"(%762, %763) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%765 = torch.aten.add_.Tensor %764, %arg154, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%766 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%767 = torch.aten.view %765, %766 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%768 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%769 = torch.aten.permute %767, %768 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%770 = torch.aten.t %arg159 : !torch.tensor -> !torch.tensor
%771 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%772 = torch.aten.view %result0_39, %771 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%773 = torch.aten.mm %772, %770 : !torch.tensor, !torch.tensor -> !torch.tensor
%774 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%775 = torch.operator "aten._unsafe_view"(%773, %774) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%776 = torch.aten.add_.Tensor %775, %arg158, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%777 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%778 = torch.aten.view %776, %777 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%779 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%780 = torch.aten.permute %778, %779 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%781 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%782 = torch.aten.view %758, %781 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%783 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%784 = torch.aten.permute %782, %783 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%785 = torch.aten.transpose.int %769, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%786 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%787 = torch.aten.expand %784, %786, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%788 = torch.operator "aten.clone"(%787, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%789 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%790 = torch.operator "aten._unsafe_view"(%788, %789) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%791 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%792 = torch.aten.expand %785, %791, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%793 = torch.operator "aten.clone"(%792, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%794 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%795 = torch.operator "aten._unsafe_view"(%793, %794) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%796 = torch.aten.bmm %790, %795 : !torch.tensor, !torch.tensor -> !torch.tensor
%797 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%798 = torch.operator "aten._unsafe_view"(%796, %797) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%799 = torch.aten.div.Tensor %798, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%800 = torch.aten.add.Tensor %799, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%801 = torch.aten._softmax %800, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%802 = torch.aten.empty_like %801, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%803 = torch.operator "aten.bernoulli_.float"(%802, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%804 = torch.aten.div_.Scalar %803, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%805 = torch.aten.mul.Tensor %801, %804 : !torch.tensor, !torch.tensor -> !torch.tensor
%806 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%807 = torch.aten.expand %805, %806, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%808 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%809 = torch.aten.view %807, %808 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%810 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%811 = torch.aten.expand %780, %810, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%812 = torch.operator "aten.clone"(%811, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%813 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%814 = torch.operator "aten._unsafe_view"(%812, %813) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%815 = torch.aten.bmm %809, %814 : !torch.tensor, !torch.tensor -> !torch.tensor
%816 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%817 = torch.operator "aten._unsafe_view"(%815, %816) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%818 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%819 = torch.aten.permute %817, %818 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%820 = torch.operator "aten.clone"(%819, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%821 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%822 = torch.aten.view %820, %821 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%823 = torch.aten.t %arg153 : !torch.tensor -> !torch.tensor
%824 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%825 = torch.aten.view %822, %824 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%826 = torch.aten.mm %825, %823 : !torch.tensor, !torch.tensor -> !torch.tensor
%827 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%828 = torch.operator "aten._unsafe_view"(%826, %827) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%829 = torch.aten.add_.Tensor %828, %arg152, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%830 = torch.aten.empty_like %829, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%831 = torch.operator "aten.bernoulli_.float"(%830, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%832 = torch.aten.div_.Scalar %831, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%833 = torch.aten.mul.Tensor %829, %832 : !torch.tensor, !torch.tensor -> !torch.tensor
%834 = torch.aten.add.Tensor %833, %result0_39, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%835 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_42, %result1_43, %result2_44 = torch.aten.native_layer_norm %834, %835, %arg151, %arg150, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%836 = torch.aten.t %arg161 : !torch.tensor -> !torch.tensor
%837 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%838 = torch.aten.view %result0_42, %837 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%839 = torch.aten.mm %838, %836 : !torch.tensor, !torch.tensor -> !torch.tensor
%840 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%841 = torch.operator "aten._unsafe_view"(%839, %840) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%842 = torch.aten.add_.Tensor %841, %arg160, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%843 = torch.aten.gelu %842 : !torch.tensor -> !torch.tensor
%844 = torch.aten.t %arg165 : !torch.tensor -> !torch.tensor
%845 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%846 = torch.aten.view %843, %845 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%847 = torch.aten.mm %846, %844 : !torch.tensor, !torch.tensor -> !torch.tensor
%848 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%849 = torch.operator "aten._unsafe_view"(%847, %848) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%850 = torch.aten.add_.Tensor %849, %arg164, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%851 = torch.aten.empty_like %850, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%852 = torch.operator "aten.bernoulli_.float"(%851, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%853 = torch.aten.div_.Scalar %852, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%854 = torch.aten.mul.Tensor %850, %853 : !torch.tensor, !torch.tensor -> !torch.tensor
%855 = torch.aten.add.Tensor %854, %result0_42, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%856 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_45, %result1_46, %result2_47 = torch.aten.native_layer_norm %855, %856, %arg163, %arg162, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%857 = torch.aten.t %arg173 : !torch.tensor -> !torch.tensor
%858 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%859 = torch.aten.view %result0_45, %858 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%860 = torch.aten.mm %859, %857 : !torch.tensor, !torch.tensor -> !torch.tensor
%861 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%862 = torch.operator "aten._unsafe_view"(%860, %861) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%863 = torch.aten.add_.Tensor %862, %arg172, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%864 = torch.aten.t %arg171 : !torch.tensor -> !torch.tensor
%865 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%866 = torch.aten.view %result0_45, %865 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%867 = torch.aten.mm %866, %864 : !torch.tensor, !torch.tensor -> !torch.tensor
%868 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%869 = torch.operator "aten._unsafe_view"(%867, %868) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%870 = torch.aten.add_.Tensor %869, %arg170, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%871 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%872 = torch.aten.view %870, %871 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%873 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%874 = torch.aten.permute %872, %873 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%875 = torch.aten.t %arg175 : !torch.tensor -> !torch.tensor
%876 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%877 = torch.aten.view %result0_45, %876 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%878 = torch.aten.mm %877, %875 : !torch.tensor, !torch.tensor -> !torch.tensor
%879 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%880 = torch.operator "aten._unsafe_view"(%878, %879) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%881 = torch.aten.add_.Tensor %880, %arg174, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%882 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%883 = torch.aten.view %881, %882 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%884 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%885 = torch.aten.permute %883, %884 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%886 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%887 = torch.aten.view %863, %886 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%888 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%889 = torch.aten.permute %887, %888 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%890 = torch.aten.transpose.int %874, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%891 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%892 = torch.aten.expand %889, %891, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%893 = torch.operator "aten.clone"(%892, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%894 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%895 = torch.operator "aten._unsafe_view"(%893, %894) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%896 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%897 = torch.aten.expand %890, %896, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%898 = torch.operator "aten.clone"(%897, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%899 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%900 = torch.operator "aten._unsafe_view"(%898, %899) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%901 = torch.aten.bmm %895, %900 : !torch.tensor, !torch.tensor -> !torch.tensor
%902 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%903 = torch.operator "aten._unsafe_view"(%901, %902) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%904 = torch.aten.div.Tensor %903, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%905 = torch.aten.add.Tensor %904, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%906 = torch.aten._softmax %905, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%907 = torch.aten.empty_like %906, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%908 = torch.operator "aten.bernoulli_.float"(%907, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%909 = torch.aten.div_.Scalar %908, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%910 = torch.aten.mul.Tensor %906, %909 : !torch.tensor, !torch.tensor -> !torch.tensor
%911 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%912 = torch.aten.expand %910, %911, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%913 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%914 = torch.aten.view %912, %913 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%915 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%916 = torch.aten.expand %885, %915, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%917 = torch.operator "aten.clone"(%916, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%918 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%919 = torch.operator "aten._unsafe_view"(%917, %918) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%920 = torch.aten.bmm %914, %919 : !torch.tensor, !torch.tensor -> !torch.tensor
%921 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%922 = torch.operator "aten._unsafe_view"(%920, %921) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%923 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%924 = torch.aten.permute %922, %923 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%925 = torch.operator "aten.clone"(%924, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%926 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%927 = torch.aten.view %925, %926 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%928 = torch.aten.t %arg169 : !torch.tensor -> !torch.tensor
%929 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%930 = torch.aten.view %927, %929 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%931 = torch.aten.mm %930, %928 : !torch.tensor, !torch.tensor -> !torch.tensor
%932 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%933 = torch.operator "aten._unsafe_view"(%931, %932) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%934 = torch.aten.add_.Tensor %933, %arg168, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%935 = torch.aten.empty_like %934, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%936 = torch.operator "aten.bernoulli_.float"(%935, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%937 = torch.aten.div_.Scalar %936, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%938 = torch.aten.mul.Tensor %934, %937 : !torch.tensor, !torch.tensor -> !torch.tensor
%939 = torch.aten.add.Tensor %938, %result0_45, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%940 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_48, %result1_49, %result2_50 = torch.aten.native_layer_norm %939, %940, %arg167, %arg166, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%941 = torch.aten.t %arg177 : !torch.tensor -> !torch.tensor
%942 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%943 = torch.aten.view %result0_48, %942 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%944 = torch.aten.mm %943, %941 : !torch.tensor, !torch.tensor -> !torch.tensor
%945 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%946 = torch.operator "aten._unsafe_view"(%944, %945) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%947 = torch.aten.add_.Tensor %946, %arg176, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%948 = torch.aten.gelu %947 : !torch.tensor -> !torch.tensor
%949 = torch.aten.t %arg181 : !torch.tensor -> !torch.tensor
%950 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%951 = torch.aten.view %948, %950 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%952 = torch.aten.mm %951, %949 : !torch.tensor, !torch.tensor -> !torch.tensor
%953 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%954 = torch.operator "aten._unsafe_view"(%952, %953) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%955 = torch.aten.add_.Tensor %954, %arg180, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%956 = torch.aten.empty_like %955, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%957 = torch.operator "aten.bernoulli_.float"(%956, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%958 = torch.aten.div_.Scalar %957, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%959 = torch.aten.mul.Tensor %955, %958 : !torch.tensor, !torch.tensor -> !torch.tensor
%960 = torch.aten.add.Tensor %959, %result0_48, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%961 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_51, %result1_52, %result2_53 = torch.aten.native_layer_norm %960, %961, %arg179, %arg178, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%962 = torch.aten.t %arg189 : !torch.tensor -> !torch.tensor
%963 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%964 = torch.aten.view %result0_51, %963 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%965 = torch.aten.mm %964, %962 : !torch.tensor, !torch.tensor -> !torch.tensor
%966 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%967 = torch.operator "aten._unsafe_view"(%965, %966) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%968 = torch.aten.add_.Tensor %967, %arg188, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%969 = torch.aten.t %arg187 : !torch.tensor -> !torch.tensor
%970 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%971 = torch.aten.view %result0_51, %970 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%972 = torch.aten.mm %971, %969 : !torch.tensor, !torch.tensor -> !torch.tensor
%973 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%974 = torch.operator "aten._unsafe_view"(%972, %973) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%975 = torch.aten.add_.Tensor %974, %arg186, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%976 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%977 = torch.aten.view %975, %976 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%978 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%979 = torch.aten.permute %977, %978 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%980 = torch.aten.t %arg191 : !torch.tensor -> !torch.tensor
%981 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%982 = torch.aten.view %result0_51, %981 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%983 = torch.aten.mm %982, %980 : !torch.tensor, !torch.tensor -> !torch.tensor
%984 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%985 = torch.operator "aten._unsafe_view"(%983, %984) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%986 = torch.aten.add_.Tensor %985, %arg190, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%987 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%988 = torch.aten.view %986, %987 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%989 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%990 = torch.aten.permute %988, %989 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%991 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%992 = torch.aten.view %968, %991 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%993 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%994 = torch.aten.permute %992, %993 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%995 = torch.aten.transpose.int %979, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%996 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%997 = torch.aten.expand %994, %996, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%998 = torch.operator "aten.clone"(%997, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%999 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1000 = torch.operator "aten._unsafe_view"(%998, %999) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1001 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1002 = torch.aten.expand %995, %1001, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1003 = torch.operator "aten.clone"(%1002, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1004 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1005 = torch.operator "aten._unsafe_view"(%1003, %1004) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1006 = torch.aten.bmm %1000, %1005 : !torch.tensor, !torch.tensor -> !torch.tensor
%1007 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1008 = torch.operator "aten._unsafe_view"(%1006, %1007) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1009 = torch.aten.div.Tensor %1008, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1010 = torch.aten.add.Tensor %1009, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1011 = torch.aten._softmax %1010, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1012 = torch.aten.empty_like %1011, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1013 = torch.operator "aten.bernoulli_.float"(%1012, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1014 = torch.aten.div_.Scalar %1013, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1015 = torch.aten.mul.Tensor %1011, %1014 : !torch.tensor, !torch.tensor -> !torch.tensor
%1016 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1017 = torch.aten.expand %1015, %1016, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1018 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1019 = torch.aten.view %1017, %1018 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1020 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1021 = torch.aten.expand %990, %1020, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1022 = torch.operator "aten.clone"(%1021, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1023 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1024 = torch.operator "aten._unsafe_view"(%1022, %1023) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1025 = torch.aten.bmm %1019, %1024 : !torch.tensor, !torch.tensor -> !torch.tensor
%1026 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1027 = torch.operator "aten._unsafe_view"(%1025, %1026) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1028 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1029 = torch.aten.permute %1027, %1028 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1030 = torch.operator "aten.clone"(%1029, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1031 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1032 = torch.aten.view %1030, %1031 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1033 = torch.aten.t %arg185 : !torch.tensor -> !torch.tensor
%1034 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1035 = torch.aten.view %1032, %1034 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1036 = torch.aten.mm %1035, %1033 : !torch.tensor, !torch.tensor -> !torch.tensor
%1037 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1038 = torch.operator "aten._unsafe_view"(%1036, %1037) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1039 = torch.aten.add_.Tensor %1038, %arg184, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1040 = torch.aten.empty_like %1039, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1041 = torch.operator "aten.bernoulli_.float"(%1040, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1042 = torch.aten.div_.Scalar %1041, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1043 = torch.aten.mul.Tensor %1039, %1042 : !torch.tensor, !torch.tensor -> !torch.tensor
%1044 = torch.aten.add.Tensor %1043, %result0_51, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1045 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_54, %result1_55, %result2_56 = torch.aten.native_layer_norm %1044, %1045, %arg183, %arg182, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1046 = torch.aten.t %arg193 : !torch.tensor -> !torch.tensor
%1047 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1048 = torch.aten.view %result0_54, %1047 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1049 = torch.aten.mm %1048, %1046 : !torch.tensor, !torch.tensor -> !torch.tensor
%1050 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1051 = torch.operator "aten._unsafe_view"(%1049, %1050) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1052 = torch.aten.add_.Tensor %1051, %arg192, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1053 = torch.aten.gelu %1052 : !torch.tensor -> !torch.tensor
%1054 = torch.aten.t %arg197 : !torch.tensor -> !torch.tensor
%1055 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1056 = torch.aten.view %1053, %1055 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1057 = torch.aten.mm %1056, %1054 : !torch.tensor, !torch.tensor -> !torch.tensor
%1058 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1059 = torch.operator "aten._unsafe_view"(%1057, %1058) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1060 = torch.aten.add_.Tensor %1059, %arg196, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1061 = torch.aten.empty_like %1060, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1062 = torch.operator "aten.bernoulli_.float"(%1061, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1063 = torch.aten.div_.Scalar %1062, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1064 = torch.aten.mul.Tensor %1060, %1063 : !torch.tensor, !torch.tensor -> !torch.tensor
%1065 = torch.aten.add.Tensor %1064, %result0_54, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1066 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_57, %result1_58, %result2_59 = torch.aten.native_layer_norm %1065, %1066, %arg195, %arg194, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1067 = torch.aten.t %arg45 : !torch.tensor -> !torch.tensor
%1068 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1069 = torch.aten.view %result0_57, %1068 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1070 = torch.aten.mm %1069, %1067 : !torch.tensor, !torch.tensor -> !torch.tensor
%1071 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1072 = torch.operator "aten._unsafe_view"(%1070, %1071) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1073 = torch.aten.add_.Tensor %1072, %arg44, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1074 = torch.aten.t %arg43 : !torch.tensor -> !torch.tensor
%1075 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1076 = torch.aten.view %result0_57, %1075 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1077 = torch.aten.mm %1076, %1074 : !torch.tensor, !torch.tensor -> !torch.tensor
%1078 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1079 = torch.operator "aten._unsafe_view"(%1077, %1078) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1080 = torch.aten.add_.Tensor %1079, %arg42, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1081 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1082 = torch.aten.view %1080, %1081 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1083 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1084 = torch.aten.permute %1082, %1083 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1085 = torch.aten.t %arg47 : !torch.tensor -> !torch.tensor
%1086 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1087 = torch.aten.view %result0_57, %1086 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1088 = torch.aten.mm %1087, %1085 : !torch.tensor, !torch.tensor -> !torch.tensor
%1089 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1090 = torch.operator "aten._unsafe_view"(%1088, %1089) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1091 = torch.aten.add_.Tensor %1090, %arg46, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1092 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1093 = torch.aten.view %1091, %1092 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1094 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1095 = torch.aten.permute %1093, %1094 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1096 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1097 = torch.aten.view %1073, %1096 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1098 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1099 = torch.aten.permute %1097, %1098 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1100 = torch.aten.transpose.int %1084, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1101 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1102 = torch.aten.expand %1099, %1101, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1103 = torch.operator "aten.clone"(%1102, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1104 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1105 = torch.operator "aten._unsafe_view"(%1103, %1104) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1106 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1107 = torch.aten.expand %1100, %1106, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1108 = torch.operator "aten.clone"(%1107, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1109 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1110 = torch.operator "aten._unsafe_view"(%1108, %1109) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1111 = torch.aten.bmm %1105, %1110 : !torch.tensor, !torch.tensor -> !torch.tensor
%1112 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1113 = torch.operator "aten._unsafe_view"(%1111, %1112) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1114 = torch.aten.div.Tensor %1113, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1115 = torch.aten.add.Tensor %1114, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1116 = torch.aten._softmax %1115, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1117 = torch.aten.empty_like %1116, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1118 = torch.operator "aten.bernoulli_.float"(%1117, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1119 = torch.aten.div_.Scalar %1118, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1120 = torch.aten.mul.Tensor %1116, %1119 : !torch.tensor, !torch.tensor -> !torch.tensor
%1121 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1122 = torch.aten.expand %1120, %1121, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1123 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1124 = torch.aten.view %1122, %1123 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1125 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1126 = torch.aten.expand %1095, %1125, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1127 = torch.operator "aten.clone"(%1126, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1128 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1129 = torch.operator "aten._unsafe_view"(%1127, %1128) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1130 = torch.aten.bmm %1124, %1129 : !torch.tensor, !torch.tensor -> !torch.tensor
%1131 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1132 = torch.operator "aten._unsafe_view"(%1130, %1131) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1133 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1134 = torch.aten.permute %1132, %1133 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1135 = torch.operator "aten.clone"(%1134, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1136 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1137 = torch.aten.view %1135, %1136 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1138 = torch.aten.t %arg41 : !torch.tensor -> !torch.tensor
%1139 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1140 = torch.aten.view %1137, %1139 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1141 = torch.aten.mm %1140, %1138 : !torch.tensor, !torch.tensor -> !torch.tensor
%1142 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1143 = torch.operator "aten._unsafe_view"(%1141, %1142) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1144 = torch.aten.add_.Tensor %1143, %arg40, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1145 = torch.aten.empty_like %1144, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1146 = torch.operator "aten.bernoulli_.float"(%1145, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1147 = torch.aten.div_.Scalar %1146, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1148 = torch.aten.mul.Tensor %1144, %1147 : !torch.tensor, !torch.tensor -> !torch.tensor
%1149 = torch.aten.add.Tensor %1148, %result0_57, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1150 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_60, %result1_61, %result2_62 = torch.aten.native_layer_norm %1149, %1150, %arg39, %arg38, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1151 = torch.aten.t %arg49 : !torch.tensor -> !torch.tensor
%1152 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1153 = torch.aten.view %result0_60, %1152 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1154 = torch.aten.mm %1153, %1151 : !torch.tensor, !torch.tensor -> !torch.tensor
%1155 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1156 = torch.operator "aten._unsafe_view"(%1154, %1155) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1157 = torch.aten.add_.Tensor %1156, %arg48, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1158 = torch.aten.gelu %1157 : !torch.tensor -> !torch.tensor
%1159 = torch.aten.t %arg53 : !torch.tensor -> !torch.tensor
%1160 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1161 = torch.aten.view %1158, %1160 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1162 = torch.aten.mm %1161, %1159 : !torch.tensor, !torch.tensor -> !torch.tensor
%1163 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1164 = torch.operator "aten._unsafe_view"(%1162, %1163) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1165 = torch.aten.add_.Tensor %1164, %arg52, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1166 = torch.aten.empty_like %1165, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1167 = torch.operator "aten.bernoulli_.float"(%1166, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1168 = torch.aten.div_.Scalar %1167, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1169 = torch.aten.mul.Tensor %1165, %1168 : !torch.tensor, !torch.tensor -> !torch.tensor
%1170 = torch.aten.add.Tensor %1169, %result0_60, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1171 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_63, %result1_64, %result2_65 = torch.aten.native_layer_norm %1170, %1171, %arg51, %arg50, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1172 = torch.aten.t %arg61 : !torch.tensor -> !torch.tensor
%1173 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1174 = torch.aten.view %result0_63, %1173 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1175 = torch.aten.mm %1174, %1172 : !torch.tensor, !torch.tensor -> !torch.tensor
%1176 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1177 = torch.operator "aten._unsafe_view"(%1175, %1176) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1178 = torch.aten.add_.Tensor %1177, %arg60, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1179 = torch.aten.t %arg59 : !torch.tensor -> !torch.tensor
%1180 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1181 = torch.aten.view %result0_63, %1180 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1182 = torch.aten.mm %1181, %1179 : !torch.tensor, !torch.tensor -> !torch.tensor
%1183 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1184 = torch.operator "aten._unsafe_view"(%1182, %1183) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1185 = torch.aten.add_.Tensor %1184, %arg58, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1186 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1187 = torch.aten.view %1185, %1186 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1188 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1189 = torch.aten.permute %1187, %1188 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1190 = torch.aten.t %arg63 : !torch.tensor -> !torch.tensor
%1191 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1192 = torch.aten.view %result0_63, %1191 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1193 = torch.aten.mm %1192, %1190 : !torch.tensor, !torch.tensor -> !torch.tensor
%1194 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1195 = torch.operator "aten._unsafe_view"(%1193, %1194) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1196 = torch.aten.add_.Tensor %1195, %arg62, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1197 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1198 = torch.aten.view %1196, %1197 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1199 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1200 = torch.aten.permute %1198, %1199 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1201 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1202 = torch.aten.view %1178, %1201 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1203 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1204 = torch.aten.permute %1202, %1203 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1205 = torch.aten.transpose.int %1189, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1206 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1207 = torch.aten.expand %1204, %1206, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1208 = torch.operator "aten.clone"(%1207, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1209 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1210 = torch.operator "aten._unsafe_view"(%1208, %1209) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1211 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1212 = torch.aten.expand %1205, %1211, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1213 = torch.operator "aten.clone"(%1212, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1214 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1215 = torch.operator "aten._unsafe_view"(%1213, %1214) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1216 = torch.aten.bmm %1210, %1215 : !torch.tensor, !torch.tensor -> !torch.tensor
%1217 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1218 = torch.operator "aten._unsafe_view"(%1216, %1217) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1219 = torch.aten.div.Tensor %1218, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1220 = torch.aten.add.Tensor %1219, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1221 = torch.aten._softmax %1220, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1222 = torch.aten.empty_like %1221, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1223 = torch.operator "aten.bernoulli_.float"(%1222, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1224 = torch.aten.div_.Scalar %1223, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1225 = torch.aten.mul.Tensor %1221, %1224 : !torch.tensor, !torch.tensor -> !torch.tensor
%1226 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1227 = torch.aten.expand %1225, %1226, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1228 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1229 = torch.aten.view %1227, %1228 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1230 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1231 = torch.aten.expand %1200, %1230, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1232 = torch.operator "aten.clone"(%1231, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1233 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1234 = torch.operator "aten._unsafe_view"(%1232, %1233) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1235 = torch.aten.bmm %1229, %1234 : !torch.tensor, !torch.tensor -> !torch.tensor
%1236 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1237 = torch.operator "aten._unsafe_view"(%1235, %1236) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1238 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1239 = torch.aten.permute %1237, %1238 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1240 = torch.operator "aten.clone"(%1239, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1241 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1242 = torch.aten.view %1240, %1241 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1243 = torch.aten.t %arg57 : !torch.tensor -> !torch.tensor
%1244 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1245 = torch.aten.view %1242, %1244 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1246 = torch.aten.mm %1245, %1243 : !torch.tensor, !torch.tensor -> !torch.tensor
%1247 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1248 = torch.operator "aten._unsafe_view"(%1246, %1247) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1249 = torch.aten.add_.Tensor %1248, %arg56, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1250 = torch.aten.empty_like %1249, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1251 = torch.operator "aten.bernoulli_.float"(%1250, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1252 = torch.aten.div_.Scalar %1251, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1253 = torch.aten.mul.Tensor %1249, %1252 : !torch.tensor, !torch.tensor -> !torch.tensor
%1254 = torch.aten.add.Tensor %1253, %result0_63, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1255 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_66, %result1_67, %result2_68 = torch.aten.native_layer_norm %1254, %1255, %arg55, %arg54, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1256 = torch.aten.t %arg65 : !torch.tensor -> !torch.tensor
%1257 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1258 = torch.aten.view %result0_66, %1257 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1259 = torch.aten.mm %1258, %1256 : !torch.tensor, !torch.tensor -> !torch.tensor
%1260 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1261 = torch.operator "aten._unsafe_view"(%1259, %1260) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1262 = torch.aten.add_.Tensor %1261, %arg64, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1263 = torch.aten.gelu %1262 : !torch.tensor -> !torch.tensor
%1264 = torch.aten.t %arg69 : !torch.tensor -> !torch.tensor
%1265 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1266 = torch.aten.view %1263, %1265 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1267 = torch.aten.mm %1266, %1264 : !torch.tensor, !torch.tensor -> !torch.tensor
%1268 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1269 = torch.operator "aten._unsafe_view"(%1267, %1268) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1270 = torch.aten.add_.Tensor %1269, %arg68, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1271 = torch.aten.empty_like %1270, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1272 = torch.operator "aten.bernoulli_.float"(%1271, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1273 = torch.aten.div_.Scalar %1272, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1274 = torch.aten.mul.Tensor %1270, %1273 : !torch.tensor, !torch.tensor -> !torch.tensor
%1275 = torch.aten.add.Tensor %1274, %result0_66, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1276 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_69, %result1_70, %result2_71 = torch.aten.native_layer_norm %1275, %1276, %arg67, %arg66, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1277 = torch.aten.t %arg204 : !torch.tensor -> !torch.tensor
%1278 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1279 = torch.aten.view %result0_69, %1278 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1280 = torch.aten.mm %1279, %1277 : !torch.tensor, !torch.tensor -> !torch.tensor
%1281 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1282 = torch.operator "aten._unsafe_view"(%1280, %1281) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1283 = torch.aten.add_.Tensor %1282, %arg203, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1284 = torch.aten.gelu %1283 : !torch.tensor -> !torch.tensor
%1285 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_72, %result1_73, %result2_74 = torch.aten.native_layer_norm %1284, %1285, %arg202, %arg201, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1286 = torch.aten.t %arg200 : !torch.tensor -> !torch.tensor
%1287 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1288 = torch.aten.view %result0_72, %1287 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1289 = torch.aten.mm %1288, %1286 : !torch.tensor, !torch.tensor -> !torch.tensor
%1290 = torch.prim.ListConstruct %int4, %int512, %int30522 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1291 = torch.operator "aten._unsafe_view"(%1289, %1290) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1292 = torch.aten.add_.Tensor %1291, %arg199, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1293 = torch.prim.ListConstruct %int-1, %int30522 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1294 = torch.aten.view %1292, %1293 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1295 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<!torch.int>
%1296 = torch.aten.view %arg208, %1295 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1297 = torch.operator "aten._log_softmax"(%1294, %int1, %false) : (!torch.tensor, !torch.int, !torch.bool) -> !torch.tensor
%output, %total_weight = torch.aten.nll_loss_forward %1297, %1296, %none, %int1, %int-100 : !torch.tensor, !torch.tensor, !torch.none, !torch.int, !torch.int -> !torch.tensor, !torch.tensor
%1298 = torch.aten.transpose.int %1229, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1299 = torch.aten.transpose.int %1234, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1300 = torch.aten.transpose.int %1210, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1301 = torch.aten.transpose.int %1215, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1302 = torch.aten.transpose.int %1124, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1303 = torch.aten.transpose.int %1129, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1304 = torch.aten.transpose.int %1105, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1305 = torch.aten.transpose.int %1110, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1306 = torch.aten.transpose.int %1019, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1307 = torch.aten.transpose.int %1024, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1308 = torch.aten.transpose.int %1000, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1309 = torch.aten.transpose.int %1005, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1310 = torch.aten.transpose.int %914, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1311 = torch.aten.transpose.int %919, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1312 = torch.aten.transpose.int %895, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1313 = torch.aten.transpose.int %900, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1314 = torch.aten.transpose.int %809, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1315 = torch.aten.transpose.int %814, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1316 = torch.aten.transpose.int %790, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1317 = torch.aten.transpose.int %795, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1318 = torch.aten.transpose.int %704, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1319 = torch.aten.transpose.int %709, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1320 = torch.aten.transpose.int %685, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1321 = torch.aten.transpose.int %690, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1322 = torch.aten.transpose.int %599, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1323 = torch.aten.transpose.int %604, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1324 = torch.aten.transpose.int %580, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1325 = torch.aten.transpose.int %585, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1326 = torch.aten.transpose.int %494, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1327 = torch.aten.transpose.int %499, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1328 = torch.aten.transpose.int %475, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1329 = torch.aten.transpose.int %480, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1330 = torch.aten.transpose.int %389, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1331 = torch.aten.transpose.int %394, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1332 = torch.aten.transpose.int %370, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1333 = torch.aten.transpose.int %375, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1334 = torch.aten.transpose.int %284, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1335 = torch.aten.transpose.int %289, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1336 = torch.aten.transpose.int %265, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1337 = torch.aten.transpose.int %270, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1338 = torch.aten.transpose.int %179, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1339 = torch.aten.transpose.int %184, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1340 = torch.aten.transpose.int %160, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1341 = torch.aten.transpose.int %165, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1342 = torch.aten.transpose.int %74, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1343 = torch.aten.transpose.int %79, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1344 = torch.aten.transpose.int %55, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1345 = torch.aten.transpose.int %60, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1346 = torch.prim.ListConstruct %output, %1292, %307, %result2_20, %arg99, %result2_50, %1014, %1275, %arg37, %result1_16, %825, %591, %1076, %arg121, %arg38, %446, %result2_5, %arg162, %arg27, %1338, %1245, %result2_11, %result1_7, %arg101, %1224, %arg77, %arg137, %124, %arg95, %1297, %510, %result1_10, %arg117, %321, %594, %733, %arg178, %arg17, %540, %arg134, %arg50, %964, %result1_52, %arg71, %720, %arg41, %arg34, %arg175, %result2_2, %arg61, %arg54, %5, %1116, %204, %arg119, %arg66, %arg201, %result1_37, %arg31, %arg91, %1322, %457, %1330, %result1_46, %1153, %1314, %arg1, %97, %1332, %arg113, %656, %result2_35, %arg67, %arg131, %arg39, %arg195, %arg167, %1221, %334, %26, %1252, %328, %arg150, %result1_34, %517, %1301, %628, %519, %853, %arg189, %result1_4, %result2_56, %1334, %1303, %arg159, %906, %arg45, %279, %439, %212, %result2_41, %1325, %855, %arg153, %1335, %859, %arg82, %877, %489, %arg29, %result2_44, %arg182, %118, %result1_22, %1087, %arg151, %result1_49, %381, %arg146, %1316, %arg141, %1308, %6, %result2_47, %arg193, %947, %1324, %arg93, %622, %arg22, %1326, %arg43, %arg187, %result2_53, %615, %1340, %216, %982, %result2_23, %result1_28, %15, %arg139, %arg194, %arg155, %arg204, %66, %result1_43, %arg47, %667, %1056, %1310, %arg147, %result1_73, %1157, %1329, %arg183, %arg181, %1319, %741, %1300, %384, %1328, %804, %arg105, %1323, %69, %195, %1052, %1266, %1140, %arg18, %result2_38, %223, %300, %arg107, %645, %208, %result1_70, %832, %433, %171, %arg86, %939, %958, %1344, %result1, %562, %1304, %arg75, %1069, %90, %486, %arg65, %arg191, %418, %1147, %313, %arg161, %arg125, %1339, %result1_13, %result2_68, %arg165, %arg19, %426, %1342, %971, %arg69, %result2_8, %arg102, %result2_17, %arg2, %943, %1279, %arg89, %276, %412, %435, %1313, %arg129, %1315, %arg81, %750, %arg63, %1011, %1161, %1273, %1174, %1299, %arg111, %result1_55, %544, %801, %arg185, %930, %arg179, %arg114, %arg49, %699, %1288, %result1_31, %1309, %1044, %1048, %1262, %1305, %19, %result2_26, %arg98, %1306, %arg15, %937, %696, %arg85, %229, %761, %174, %result2_71, %arg87, %arg135, %643, %909, %result2_65, %1336, %arg123, %arg55, %arg59, %total_weight, %772, %1333, %result1_67, %1318, %arg130, %1168, %arg133, %247, %754, %arg97, %result1_64, %result2, %1331, %result1_40, %842, %arg21, %result1_25, %1321, %330, %737, %result2_14, %result1_58, %result2_29, %1149, %99, %352, %arg11, %142, %1254, %309, %arg25, %551, %arg109, %1345, %1258, %1343, %arg70, %arg202, %arg207, %414, %arg127, %834, %arg33, %arg57, %846, %624, %538, %arg143, %result1_19, %1302, %result1_1, %202, %1311, %1341, %1327, %37, %arg79, %arg197, %1337, %11, %result2_32, %1312, %649, %422, %236, %727, %arg149, %arg200, %arg177, %arg169, %1063, %1296, %1320, %arg7, %arg163, %951, %1297, %729, %960, %result2_62, %341, %1119, %result2_74, %arg118, %1283, %120, %arg115, %1181, %arg13, %107, %arg73, %1298, %arg53, %1170, %result2_59, %result1_61, %arg51, %arg173, %arg9, %632, %866, %arg145, %131, %arg35, %1065, %636, %748, %405, %1042, %arg83, %1192, %527, %103, %838, %225, %arg171, %1035, %531, %arg166, %arg23, %arg103, %arg6, %111, %1317, %1307, %arg157, %317, %523 : (!torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor) -> !torch.list<!torch.tensor>
return %1346 : !torch.list<!torch.tensor>
}
torch.class_type @__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule {
torch.method "forward", @__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule.forward
}
%0 = torch.nn_module {
} : !torch.nn.Module<"__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule">
}
// -----// IR Dump After GlobalizeObjectGraph //----- //
module attributes {torch.debug_module_name = "GraphModule"} {
func @forward(%arg0: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg1: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg2: !torch.tensor {torch.type_bound = !torch.vtensor<[512,768],f32>}, %arg3: !torch.tensor {torch.type_bound = !torch.vtensor<[2,768],f32>}, %arg4: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,768],f32>}, %arg5: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg6: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg7: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg8: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg9: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg10: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg11: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg12: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg13: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg14: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg15: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg16: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg17: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg18: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg19: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg20: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg21: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg22: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg23: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg24: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg25: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg26: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg27: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg28: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg29: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg30: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg31: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg32: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg33: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg34: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg35: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg36: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg37: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg38: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg39: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg40: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg41: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg42: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg43: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg44: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg45: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg46: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg47: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg48: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg49: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg50: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg51: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg52: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg53: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg54: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg55: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg56: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg57: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg58: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg59: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg60: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg61: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg62: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg63: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg64: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg65: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg66: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg67: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg68: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg69: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg70: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg71: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg72: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg73: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg74: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg75: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg76: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg77: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg78: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg79: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg80: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg81: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg82: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg83: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg84: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg85: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg86: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg87: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg88: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg89: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg90: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg91: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg92: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg93: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg94: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg95: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg96: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg97: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg98: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg99: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg100: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg101: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg102: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg103: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg104: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg105: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg106: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg107: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg108: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg109: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg110: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg111: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg112: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg113: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg114: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg115: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg116: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg117: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg118: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg119: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg120: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg121: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg122: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg123: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg124: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg125: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg126: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg127: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg128: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg129: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg130: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg131: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg132: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg133: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg134: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg135: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg136: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg137: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg138: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg139: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg140: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg141: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg142: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg143: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg144: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg145: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg146: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg147: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg148: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg149: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg150: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg151: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg152: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg153: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg154: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg155: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg156: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg157: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg158: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg159: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg160: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg161: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg162: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg163: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg164: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg165: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg166: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg167: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg168: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg169: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg170: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg171: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg172: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg173: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg174: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg175: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg176: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg177: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg178: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg179: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg180: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg181: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg182: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg183: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg184: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg185: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg186: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg187: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg188: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg189: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg190: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg191: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg192: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg193: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg194: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg195: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg196: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg197: !torch.tensor {torch.type_bound = !torch.vtensor<[30522],f32>}, %arg198: !torch.tensor {torch.type_bound = !torch.vtensor<[30522],f32>}, %arg199: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,768],f32>}, %arg200: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg201: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg202: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg203: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg204: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg205: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg206: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512],si64>}, %arg207: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512],si64>}) -> !torch.list<!torch.tensor> {
%none = torch.constant.none
%false = torch.constant.bool false
%int0 = torch.constant.int 0
%int9223372036854775807 = torch.constant.int 9223372036854775807
%int4 = torch.constant.int 4
%int512 = torch.constant.int 512
%int768 = torch.constant.int 768
%float9.999990e-13 = torch.constant.float 9.9999999999999998E-13
%float9.000000e-01 = torch.constant.float 9.000000e-01
%int2048 = torch.constant.int 2048
%int12 = torch.constant.int 12
%int64 = torch.constant.int 64
%int2 = torch.constant.int 2
%int1 = torch.constant.int 1
%int3 = torch.constant.int 3
%int-1 = torch.constant.int -1
%int-2 = torch.constant.int -2
%int48 = torch.constant.int 48
%int3072 = torch.constant.int 3072
%int30522 = torch.constant.int 30522
%int-100 = torch.constant.int -100
%0 = torch.tensor.literal(dense<-0.000000e+00> : tensor<4x1x1x512xf32>) : !torch.tensor<[4,1,1,512],f32>
%1 = torch.tensor.literal(dense<8.000000e+00> : tensor<f64>) : !torch.tensor<[],f64>
%2 = torch.aten.slice.Tensor %arg205, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%3 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%4 = torch.aten.expand %2, %3, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%5 = torch.aten.slice.Tensor %arg204, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%6 = torch.aten.embedding %arg4, %arg206, %int0, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%7 = torch.aten.embedding %arg3, %4, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%8 = torch.aten.add.Tensor %6, %7, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%9 = torch.aten.embedding %arg2, %5, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%10 = torch.aten.add_.Tensor %8, %9, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%11 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0, %result1, %result2 = torch.aten.native_layer_norm %10, %11, %arg1, %arg0, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%12 = torch.aten.empty_like %result0, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%13 = torch.operator "aten.bernoulli_.float"(%12, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%14 = torch.aten.div_.Scalar %13, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%15 = torch.aten.mul.Tensor %result0, %14 : !torch.tensor, !torch.tensor -> !torch.tensor
%16 = torch.aten.t %arg12 : !torch.tensor -> !torch.tensor
%17 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%18 = torch.aten.view %15, %17 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%19 = torch.aten.mm %18, %16 : !torch.tensor, !torch.tensor -> !torch.tensor
%20 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%21 = torch.operator "aten._unsafe_view"(%19, %20) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%22 = torch.aten.add_.Tensor %21, %arg11, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%23 = torch.aten.t %arg10 : !torch.tensor -> !torch.tensor
%24 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%25 = torch.aten.view %15, %24 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%26 = torch.aten.mm %25, %23 : !torch.tensor, !torch.tensor -> !torch.tensor
%27 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%28 = torch.operator "aten._unsafe_view"(%26, %27) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%29 = torch.aten.add_.Tensor %28, %arg9, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%30 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%31 = torch.aten.view %29, %30 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%32 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%33 = torch.aten.permute %31, %32 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%34 = torch.aten.t %arg14 : !torch.tensor -> !torch.tensor
%35 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%36 = torch.aten.view %15, %35 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%37 = torch.aten.mm %36, %34 : !torch.tensor, !torch.tensor -> !torch.tensor
%38 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%39 = torch.operator "aten._unsafe_view"(%37, %38) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%40 = torch.aten.add_.Tensor %39, %arg13, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%41 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%42 = torch.aten.view %40, %41 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%43 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%44 = torch.aten.permute %42, %43 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%45 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%46 = torch.aten.view %22, %45 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%47 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%48 = torch.aten.permute %46, %47 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%49 = torch.aten.transpose.int %33, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%50 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%51 = torch.aten.expand %48, %50, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%52 = torch.operator "aten.clone"(%51, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%53 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%54 = torch.operator "aten._unsafe_view"(%52, %53) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%55 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%56 = torch.aten.expand %49, %55, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%57 = torch.operator "aten.clone"(%56, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%58 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%59 = torch.operator "aten._unsafe_view"(%57, %58) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%60 = torch.aten.bmm %54, %59 : !torch.tensor, !torch.tensor -> !torch.tensor
%61 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%62 = torch.operator "aten._unsafe_view"(%60, %61) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%63 = torch.aten.div.Tensor %62, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%64 = torch.aten.add.Tensor %63, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%65 = torch.aten._softmax %64, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%66 = torch.aten.empty_like %65, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%67 = torch.operator "aten.bernoulli_.float"(%66, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%68 = torch.aten.div_.Scalar %67, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%69 = torch.aten.mul.Tensor %65, %68 : !torch.tensor, !torch.tensor -> !torch.tensor
%70 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%71 = torch.aten.expand %69, %70, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%72 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%73 = torch.aten.view %71, %72 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%74 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%75 = torch.aten.expand %44, %74, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%76 = torch.operator "aten.clone"(%75, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%77 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%78 = torch.operator "aten._unsafe_view"(%76, %77) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%79 = torch.aten.bmm %73, %78 : !torch.tensor, !torch.tensor -> !torch.tensor
%80 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%81 = torch.operator "aten._unsafe_view"(%79, %80) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%82 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%83 = torch.aten.permute %81, %82 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%84 = torch.operator "aten.clone"(%83, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%85 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%86 = torch.aten.view %84, %85 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%87 = torch.aten.t %arg8 : !torch.tensor -> !torch.tensor
%88 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%89 = torch.aten.view %86, %88 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%90 = torch.aten.mm %89, %87 : !torch.tensor, !torch.tensor -> !torch.tensor
%91 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%92 = torch.operator "aten._unsafe_view"(%90, %91) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%93 = torch.aten.add_.Tensor %92, %arg7, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%94 = torch.aten.empty_like %93, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%95 = torch.operator "aten.bernoulli_.float"(%94, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%96 = torch.aten.div_.Scalar %95, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%97 = torch.aten.mul.Tensor %93, %96 : !torch.tensor, !torch.tensor -> !torch.tensor
%98 = torch.aten.add.Tensor %97, %15, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%99 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_0, %result1_1, %result2_2 = torch.aten.native_layer_norm %98, %99, %arg6, %arg5, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%100 = torch.aten.t %arg16 : !torch.tensor -> !torch.tensor
%101 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%102 = torch.aten.view %result0_0, %101 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%103 = torch.aten.mm %102, %100 : !torch.tensor, !torch.tensor -> !torch.tensor
%104 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%105 = torch.operator "aten._unsafe_view"(%103, %104) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%106 = torch.aten.add_.Tensor %105, %arg15, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%107 = torch.aten.gelu %106 : !torch.tensor -> !torch.tensor
%108 = torch.aten.t %arg20 : !torch.tensor -> !torch.tensor
%109 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%110 = torch.aten.view %107, %109 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%111 = torch.aten.mm %110, %108 : !torch.tensor, !torch.tensor -> !torch.tensor
%112 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%113 = torch.operator "aten._unsafe_view"(%111, %112) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%114 = torch.aten.add_.Tensor %113, %arg19, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%115 = torch.aten.empty_like %114, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%116 = torch.operator "aten.bernoulli_.float"(%115, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%117 = torch.aten.div_.Scalar %116, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%118 = torch.aten.mul.Tensor %114, %117 : !torch.tensor, !torch.tensor -> !torch.tensor
%119 = torch.aten.add.Tensor %118, %result0_0, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%120 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_3, %result1_4, %result2_5 = torch.aten.native_layer_norm %119, %120, %arg18, %arg17, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%121 = torch.aten.t %arg28 : !torch.tensor -> !torch.tensor
%122 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%123 = torch.aten.view %result0_3, %122 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%124 = torch.aten.mm %123, %121 : !torch.tensor, !torch.tensor -> !torch.tensor
%125 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%126 = torch.operator "aten._unsafe_view"(%124, %125) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%127 = torch.aten.add_.Tensor %126, %arg27, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%128 = torch.aten.t %arg26 : !torch.tensor -> !torch.tensor
%129 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%130 = torch.aten.view %result0_3, %129 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%131 = torch.aten.mm %130, %128 : !torch.tensor, !torch.tensor -> !torch.tensor
%132 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%133 = torch.operator "aten._unsafe_view"(%131, %132) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%134 = torch.aten.add_.Tensor %133, %arg25, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%135 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%136 = torch.aten.view %134, %135 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%137 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%138 = torch.aten.permute %136, %137 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%139 = torch.aten.t %arg30 : !torch.tensor -> !torch.tensor
%140 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%141 = torch.aten.view %result0_3, %140 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%142 = torch.aten.mm %141, %139 : !torch.tensor, !torch.tensor -> !torch.tensor
%143 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%144 = torch.operator "aten._unsafe_view"(%142, %143) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%145 = torch.aten.add_.Tensor %144, %arg29, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%146 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%147 = torch.aten.view %145, %146 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%148 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%149 = torch.aten.permute %147, %148 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%150 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%151 = torch.aten.view %127, %150 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%152 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%153 = torch.aten.permute %151, %152 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%154 = torch.aten.transpose.int %138, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%155 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%156 = torch.aten.expand %153, %155, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%157 = torch.operator "aten.clone"(%156, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%158 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%159 = torch.operator "aten._unsafe_view"(%157, %158) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%160 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%161 = torch.aten.expand %154, %160, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%162 = torch.operator "aten.clone"(%161, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%163 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%164 = torch.operator "aten._unsafe_view"(%162, %163) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%165 = torch.aten.bmm %159, %164 : !torch.tensor, !torch.tensor -> !torch.tensor
%166 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%167 = torch.operator "aten._unsafe_view"(%165, %166) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%168 = torch.aten.div.Tensor %167, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%169 = torch.aten.add.Tensor %168, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%170 = torch.aten._softmax %169, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%171 = torch.aten.empty_like %170, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%172 = torch.operator "aten.bernoulli_.float"(%171, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%173 = torch.aten.div_.Scalar %172, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%174 = torch.aten.mul.Tensor %170, %173 : !torch.tensor, !torch.tensor -> !torch.tensor
%175 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%176 = torch.aten.expand %174, %175, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%177 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%178 = torch.aten.view %176, %177 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%179 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%180 = torch.aten.expand %149, %179, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%181 = torch.operator "aten.clone"(%180, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%182 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%183 = torch.operator "aten._unsafe_view"(%181, %182) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%184 = torch.aten.bmm %178, %183 : !torch.tensor, !torch.tensor -> !torch.tensor
%185 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%186 = torch.operator "aten._unsafe_view"(%184, %185) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%187 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%188 = torch.aten.permute %186, %187 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%189 = torch.operator "aten.clone"(%188, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%190 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%191 = torch.aten.view %189, %190 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%192 = torch.aten.t %arg24 : !torch.tensor -> !torch.tensor
%193 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%194 = torch.aten.view %191, %193 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%195 = torch.aten.mm %194, %192 : !torch.tensor, !torch.tensor -> !torch.tensor
%196 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%197 = torch.operator "aten._unsafe_view"(%195, %196) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%198 = torch.aten.add_.Tensor %197, %arg23, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%199 = torch.aten.empty_like %198, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%200 = torch.operator "aten.bernoulli_.float"(%199, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%201 = torch.aten.div_.Scalar %200, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%202 = torch.aten.mul.Tensor %198, %201 : !torch.tensor, !torch.tensor -> !torch.tensor
%203 = torch.aten.add.Tensor %202, %result0_3, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%204 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_6, %result1_7, %result2_8 = torch.aten.native_layer_norm %203, %204, %arg22, %arg21, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%205 = torch.aten.t %arg32 : !torch.tensor -> !torch.tensor
%206 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%207 = torch.aten.view %result0_6, %206 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%208 = torch.aten.mm %207, %205 : !torch.tensor, !torch.tensor -> !torch.tensor
%209 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%210 = torch.operator "aten._unsafe_view"(%208, %209) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%211 = torch.aten.add_.Tensor %210, %arg31, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%212 = torch.aten.gelu %211 : !torch.tensor -> !torch.tensor
%213 = torch.aten.t %arg36 : !torch.tensor -> !torch.tensor
%214 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%215 = torch.aten.view %212, %214 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%216 = torch.aten.mm %215, %213 : !torch.tensor, !torch.tensor -> !torch.tensor
%217 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%218 = torch.operator "aten._unsafe_view"(%216, %217) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%219 = torch.aten.add_.Tensor %218, %arg35, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%220 = torch.aten.empty_like %219, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%221 = torch.operator "aten.bernoulli_.float"(%220, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%222 = torch.aten.div_.Scalar %221, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%223 = torch.aten.mul.Tensor %219, %222 : !torch.tensor, !torch.tensor -> !torch.tensor
%224 = torch.aten.add.Tensor %223, %result0_6, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%225 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_9, %result1_10, %result2_11 = torch.aten.native_layer_norm %224, %225, %arg34, %arg33, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%226 = torch.aten.t %arg76 : !torch.tensor -> !torch.tensor
%227 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%228 = torch.aten.view %result0_9, %227 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%229 = torch.aten.mm %228, %226 : !torch.tensor, !torch.tensor -> !torch.tensor
%230 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%231 = torch.operator "aten._unsafe_view"(%229, %230) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%232 = torch.aten.add_.Tensor %231, %arg75, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%233 = torch.aten.t %arg74 : !torch.tensor -> !torch.tensor
%234 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%235 = torch.aten.view %result0_9, %234 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%236 = torch.aten.mm %235, %233 : !torch.tensor, !torch.tensor -> !torch.tensor
%237 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%238 = torch.operator "aten._unsafe_view"(%236, %237) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%239 = torch.aten.add_.Tensor %238, %arg73, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%240 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%241 = torch.aten.view %239, %240 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%242 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%243 = torch.aten.permute %241, %242 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%244 = torch.aten.t %arg78 : !torch.tensor -> !torch.tensor
%245 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%246 = torch.aten.view %result0_9, %245 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%247 = torch.aten.mm %246, %244 : !torch.tensor, !torch.tensor -> !torch.tensor
%248 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%249 = torch.operator "aten._unsafe_view"(%247, %248) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%250 = torch.aten.add_.Tensor %249, %arg77, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%251 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%252 = torch.aten.view %250, %251 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%253 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%254 = torch.aten.permute %252, %253 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%255 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%256 = torch.aten.view %232, %255 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%257 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%258 = torch.aten.permute %256, %257 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%259 = torch.aten.transpose.int %243, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%260 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%261 = torch.aten.expand %258, %260, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%262 = torch.operator "aten.clone"(%261, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%263 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%264 = torch.operator "aten._unsafe_view"(%262, %263) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%265 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%266 = torch.aten.expand %259, %265, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%267 = torch.operator "aten.clone"(%266, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%268 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%269 = torch.operator "aten._unsafe_view"(%267, %268) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%270 = torch.aten.bmm %264, %269 : !torch.tensor, !torch.tensor -> !torch.tensor
%271 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%272 = torch.operator "aten._unsafe_view"(%270, %271) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%273 = torch.aten.div.Tensor %272, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%274 = torch.aten.add.Tensor %273, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%275 = torch.aten._softmax %274, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%276 = torch.aten.empty_like %275, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%277 = torch.operator "aten.bernoulli_.float"(%276, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%278 = torch.aten.div_.Scalar %277, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%279 = torch.aten.mul.Tensor %275, %278 : !torch.tensor, !torch.tensor -> !torch.tensor
%280 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%281 = torch.aten.expand %279, %280, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%282 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%283 = torch.aten.view %281, %282 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%284 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%285 = torch.aten.expand %254, %284, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%286 = torch.operator "aten.clone"(%285, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%287 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%288 = torch.operator "aten._unsafe_view"(%286, %287) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%289 = torch.aten.bmm %283, %288 : !torch.tensor, !torch.tensor -> !torch.tensor
%290 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%291 = torch.operator "aten._unsafe_view"(%289, %290) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%292 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%293 = torch.aten.permute %291, %292 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%294 = torch.operator "aten.clone"(%293, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%295 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%296 = torch.aten.view %294, %295 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%297 = torch.aten.t %arg72 : !torch.tensor -> !torch.tensor
%298 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%299 = torch.aten.view %296, %298 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%300 = torch.aten.mm %299, %297 : !torch.tensor, !torch.tensor -> !torch.tensor
%301 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%302 = torch.operator "aten._unsafe_view"(%300, %301) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%303 = torch.aten.add_.Tensor %302, %arg71, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%304 = torch.aten.empty_like %303, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%305 = torch.operator "aten.bernoulli_.float"(%304, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%306 = torch.aten.div_.Scalar %305, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%307 = torch.aten.mul.Tensor %303, %306 : !torch.tensor, !torch.tensor -> !torch.tensor
%308 = torch.aten.add.Tensor %307, %result0_9, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%309 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_12, %result1_13, %result2_14 = torch.aten.native_layer_norm %308, %309, %arg70, %arg69, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%310 = torch.aten.t %arg80 : !torch.tensor -> !torch.tensor
%311 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%312 = torch.aten.view %result0_12, %311 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%313 = torch.aten.mm %312, %310 : !torch.tensor, !torch.tensor -> !torch.tensor
%314 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%315 = torch.operator "aten._unsafe_view"(%313, %314) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%316 = torch.aten.add_.Tensor %315, %arg79, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%317 = torch.aten.gelu %316 : !torch.tensor -> !torch.tensor
%318 = torch.aten.t %arg84 : !torch.tensor -> !torch.tensor
%319 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%320 = torch.aten.view %317, %319 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%321 = torch.aten.mm %320, %318 : !torch.tensor, !torch.tensor -> !torch.tensor
%322 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%323 = torch.operator "aten._unsafe_view"(%321, %322) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%324 = torch.aten.add_.Tensor %323, %arg83, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%325 = torch.aten.empty_like %324, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%326 = torch.operator "aten.bernoulli_.float"(%325, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%327 = torch.aten.div_.Scalar %326, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%328 = torch.aten.mul.Tensor %324, %327 : !torch.tensor, !torch.tensor -> !torch.tensor
%329 = torch.aten.add.Tensor %328, %result0_12, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%330 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_15, %result1_16, %result2_17 = torch.aten.native_layer_norm %329, %330, %arg82, %arg81, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%331 = torch.aten.t %arg92 : !torch.tensor -> !torch.tensor
%332 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%333 = torch.aten.view %result0_15, %332 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%334 = torch.aten.mm %333, %331 : !torch.tensor, !torch.tensor -> !torch.tensor
%335 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%336 = torch.operator "aten._unsafe_view"(%334, %335) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%337 = torch.aten.add_.Tensor %336, %arg91, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%338 = torch.aten.t %arg90 : !torch.tensor -> !torch.tensor
%339 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%340 = torch.aten.view %result0_15, %339 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%341 = torch.aten.mm %340, %338 : !torch.tensor, !torch.tensor -> !torch.tensor
%342 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%343 = torch.operator "aten._unsafe_view"(%341, %342) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%344 = torch.aten.add_.Tensor %343, %arg89, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%345 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%346 = torch.aten.view %344, %345 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%347 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%348 = torch.aten.permute %346, %347 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%349 = torch.aten.t %arg94 : !torch.tensor -> !torch.tensor
%350 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%351 = torch.aten.view %result0_15, %350 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%352 = torch.aten.mm %351, %349 : !torch.tensor, !torch.tensor -> !torch.tensor
%353 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%354 = torch.operator "aten._unsafe_view"(%352, %353) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%355 = torch.aten.add_.Tensor %354, %arg93, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%356 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%357 = torch.aten.view %355, %356 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%358 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%359 = torch.aten.permute %357, %358 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%360 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%361 = torch.aten.view %337, %360 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%362 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%363 = torch.aten.permute %361, %362 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%364 = torch.aten.transpose.int %348, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%365 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%366 = torch.aten.expand %363, %365, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%367 = torch.operator "aten.clone"(%366, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%368 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%369 = torch.operator "aten._unsafe_view"(%367, %368) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%370 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%371 = torch.aten.expand %364, %370, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%372 = torch.operator "aten.clone"(%371, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%373 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%374 = torch.operator "aten._unsafe_view"(%372, %373) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%375 = torch.aten.bmm %369, %374 : !torch.tensor, !torch.tensor -> !torch.tensor
%376 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%377 = torch.operator "aten._unsafe_view"(%375, %376) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%378 = torch.aten.div.Tensor %377, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%379 = torch.aten.add.Tensor %378, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%380 = torch.aten._softmax %379, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%381 = torch.aten.empty_like %380, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%382 = torch.operator "aten.bernoulli_.float"(%381, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%383 = torch.aten.div_.Scalar %382, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%384 = torch.aten.mul.Tensor %380, %383 : !torch.tensor, !torch.tensor -> !torch.tensor
%385 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%386 = torch.aten.expand %384, %385, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%387 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%388 = torch.aten.view %386, %387 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%389 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%390 = torch.aten.expand %359, %389, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%391 = torch.operator "aten.clone"(%390, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%392 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%393 = torch.operator "aten._unsafe_view"(%391, %392) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%394 = torch.aten.bmm %388, %393 : !torch.tensor, !torch.tensor -> !torch.tensor
%395 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%396 = torch.operator "aten._unsafe_view"(%394, %395) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%397 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%398 = torch.aten.permute %396, %397 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%399 = torch.operator "aten.clone"(%398, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%400 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%401 = torch.aten.view %399, %400 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%402 = torch.aten.t %arg88 : !torch.tensor -> !torch.tensor
%403 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%404 = torch.aten.view %401, %403 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%405 = torch.aten.mm %404, %402 : !torch.tensor, !torch.tensor -> !torch.tensor
%406 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%407 = torch.operator "aten._unsafe_view"(%405, %406) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%408 = torch.aten.add_.Tensor %407, %arg87, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%409 = torch.aten.empty_like %408, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%410 = torch.operator "aten.bernoulli_.float"(%409, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%411 = torch.aten.div_.Scalar %410, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%412 = torch.aten.mul.Tensor %408, %411 : !torch.tensor, !torch.tensor -> !torch.tensor
%413 = torch.aten.add.Tensor %412, %result0_15, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%414 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_18, %result1_19, %result2_20 = torch.aten.native_layer_norm %413, %414, %arg86, %arg85, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%415 = torch.aten.t %arg96 : !torch.tensor -> !torch.tensor
%416 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%417 = torch.aten.view %result0_18, %416 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%418 = torch.aten.mm %417, %415 : !torch.tensor, !torch.tensor -> !torch.tensor
%419 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%420 = torch.operator "aten._unsafe_view"(%418, %419) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%421 = torch.aten.add_.Tensor %420, %arg95, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%422 = torch.aten.gelu %421 : !torch.tensor -> !torch.tensor
%423 = torch.aten.t %arg100 : !torch.tensor -> !torch.tensor
%424 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%425 = torch.aten.view %422, %424 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%426 = torch.aten.mm %425, %423 : !torch.tensor, !torch.tensor -> !torch.tensor
%427 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%428 = torch.operator "aten._unsafe_view"(%426, %427) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%429 = torch.aten.add_.Tensor %428, %arg99, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%430 = torch.aten.empty_like %429, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%431 = torch.operator "aten.bernoulli_.float"(%430, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%432 = torch.aten.div_.Scalar %431, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%433 = torch.aten.mul.Tensor %429, %432 : !torch.tensor, !torch.tensor -> !torch.tensor
%434 = torch.aten.add.Tensor %433, %result0_18, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%435 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_21, %result1_22, %result2_23 = torch.aten.native_layer_norm %434, %435, %arg98, %arg97, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%436 = torch.aten.t %arg108 : !torch.tensor -> !torch.tensor
%437 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%438 = torch.aten.view %result0_21, %437 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%439 = torch.aten.mm %438, %436 : !torch.tensor, !torch.tensor -> !torch.tensor
%440 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%441 = torch.operator "aten._unsafe_view"(%439, %440) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%442 = torch.aten.add_.Tensor %441, %arg107, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%443 = torch.aten.t %arg106 : !torch.tensor -> !torch.tensor
%444 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%445 = torch.aten.view %result0_21, %444 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%446 = torch.aten.mm %445, %443 : !torch.tensor, !torch.tensor -> !torch.tensor
%447 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%448 = torch.operator "aten._unsafe_view"(%446, %447) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%449 = torch.aten.add_.Tensor %448, %arg105, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%450 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%451 = torch.aten.view %449, %450 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%452 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%453 = torch.aten.permute %451, %452 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%454 = torch.aten.t %arg110 : !torch.tensor -> !torch.tensor
%455 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%456 = torch.aten.view %result0_21, %455 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%457 = torch.aten.mm %456, %454 : !torch.tensor, !torch.tensor -> !torch.tensor
%458 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%459 = torch.operator "aten._unsafe_view"(%457, %458) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%460 = torch.aten.add_.Tensor %459, %arg109, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%461 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%462 = torch.aten.view %460, %461 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%463 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%464 = torch.aten.permute %462, %463 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%465 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%466 = torch.aten.view %442, %465 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%467 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%468 = torch.aten.permute %466, %467 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%469 = torch.aten.transpose.int %453, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%470 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%471 = torch.aten.expand %468, %470, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%472 = torch.operator "aten.clone"(%471, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%473 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%474 = torch.operator "aten._unsafe_view"(%472, %473) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%475 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%476 = torch.aten.expand %469, %475, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%477 = torch.operator "aten.clone"(%476, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%478 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%479 = torch.operator "aten._unsafe_view"(%477, %478) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%480 = torch.aten.bmm %474, %479 : !torch.tensor, !torch.tensor -> !torch.tensor
%481 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%482 = torch.operator "aten._unsafe_view"(%480, %481) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%483 = torch.aten.div.Tensor %482, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%484 = torch.aten.add.Tensor %483, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%485 = torch.aten._softmax %484, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%486 = torch.aten.empty_like %485, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%487 = torch.operator "aten.bernoulli_.float"(%486, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%488 = torch.aten.div_.Scalar %487, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%489 = torch.aten.mul.Tensor %485, %488 : !torch.tensor, !torch.tensor -> !torch.tensor
%490 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%491 = torch.aten.expand %489, %490, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%492 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%493 = torch.aten.view %491, %492 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%494 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%495 = torch.aten.expand %464, %494, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%496 = torch.operator "aten.clone"(%495, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%497 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%498 = torch.operator "aten._unsafe_view"(%496, %497) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%499 = torch.aten.bmm %493, %498 : !torch.tensor, !torch.tensor -> !torch.tensor
%500 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%501 = torch.operator "aten._unsafe_view"(%499, %500) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%502 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%503 = torch.aten.permute %501, %502 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%504 = torch.operator "aten.clone"(%503, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%505 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%506 = torch.aten.view %504, %505 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%507 = torch.aten.t %arg104 : !torch.tensor -> !torch.tensor
%508 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%509 = torch.aten.view %506, %508 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%510 = torch.aten.mm %509, %507 : !torch.tensor, !torch.tensor -> !torch.tensor
%511 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%512 = torch.operator "aten._unsafe_view"(%510, %511) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%513 = torch.aten.add_.Tensor %512, %arg103, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%514 = torch.aten.empty_like %513, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%515 = torch.operator "aten.bernoulli_.float"(%514, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%516 = torch.aten.div_.Scalar %515, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%517 = torch.aten.mul.Tensor %513, %516 : !torch.tensor, !torch.tensor -> !torch.tensor
%518 = torch.aten.add.Tensor %517, %result0_21, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%519 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_24, %result1_25, %result2_26 = torch.aten.native_layer_norm %518, %519, %arg102, %arg101, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%520 = torch.aten.t %arg112 : !torch.tensor -> !torch.tensor
%521 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%522 = torch.aten.view %result0_24, %521 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%523 = torch.aten.mm %522, %520 : !torch.tensor, !torch.tensor -> !torch.tensor
%524 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%525 = torch.operator "aten._unsafe_view"(%523, %524) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%526 = torch.aten.add_.Tensor %525, %arg111, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%527 = torch.aten.gelu %526 : !torch.tensor -> !torch.tensor
%528 = torch.aten.t %arg116 : !torch.tensor -> !torch.tensor
%529 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%530 = torch.aten.view %527, %529 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%531 = torch.aten.mm %530, %528 : !torch.tensor, !torch.tensor -> !torch.tensor
%532 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%533 = torch.operator "aten._unsafe_view"(%531, %532) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%534 = torch.aten.add_.Tensor %533, %arg115, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%535 = torch.aten.empty_like %534, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%536 = torch.operator "aten.bernoulli_.float"(%535, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%537 = torch.aten.div_.Scalar %536, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%538 = torch.aten.mul.Tensor %534, %537 : !torch.tensor, !torch.tensor -> !torch.tensor
%539 = torch.aten.add.Tensor %538, %result0_24, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%540 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_27, %result1_28, %result2_29 = torch.aten.native_layer_norm %539, %540, %arg114, %arg113, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%541 = torch.aten.t %arg124 : !torch.tensor -> !torch.tensor
%542 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%543 = torch.aten.view %result0_27, %542 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%544 = torch.aten.mm %543, %541 : !torch.tensor, !torch.tensor -> !torch.tensor
%545 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%546 = torch.operator "aten._unsafe_view"(%544, %545) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%547 = torch.aten.add_.Tensor %546, %arg123, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%548 = torch.aten.t %arg122 : !torch.tensor -> !torch.tensor
%549 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%550 = torch.aten.view %result0_27, %549 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%551 = torch.aten.mm %550, %548 : !torch.tensor, !torch.tensor -> !torch.tensor
%552 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%553 = torch.operator "aten._unsafe_view"(%551, %552) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%554 = torch.aten.add_.Tensor %553, %arg121, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%555 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%556 = torch.aten.view %554, %555 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%557 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%558 = torch.aten.permute %556, %557 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%559 = torch.aten.t %arg126 : !torch.tensor -> !torch.tensor
%560 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%561 = torch.aten.view %result0_27, %560 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%562 = torch.aten.mm %561, %559 : !torch.tensor, !torch.tensor -> !torch.tensor
%563 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%564 = torch.operator "aten._unsafe_view"(%562, %563) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%565 = torch.aten.add_.Tensor %564, %arg125, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%566 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%567 = torch.aten.view %565, %566 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%568 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%569 = torch.aten.permute %567, %568 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%570 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%571 = torch.aten.view %547, %570 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%572 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%573 = torch.aten.permute %571, %572 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%574 = torch.aten.transpose.int %558, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%575 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%576 = torch.aten.expand %573, %575, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%577 = torch.operator "aten.clone"(%576, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%578 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%579 = torch.operator "aten._unsafe_view"(%577, %578) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%580 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%581 = torch.aten.expand %574, %580, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%582 = torch.operator "aten.clone"(%581, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%583 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%584 = torch.operator "aten._unsafe_view"(%582, %583) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%585 = torch.aten.bmm %579, %584 : !torch.tensor, !torch.tensor -> !torch.tensor
%586 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%587 = torch.operator "aten._unsafe_view"(%585, %586) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%588 = torch.aten.div.Tensor %587, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%589 = torch.aten.add.Tensor %588, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%590 = torch.aten._softmax %589, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%591 = torch.aten.empty_like %590, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%592 = torch.operator "aten.bernoulli_.float"(%591, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%593 = torch.aten.div_.Scalar %592, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%594 = torch.aten.mul.Tensor %590, %593 : !torch.tensor, !torch.tensor -> !torch.tensor
%595 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%596 = torch.aten.expand %594, %595, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%597 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%598 = torch.aten.view %596, %597 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%599 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%600 = torch.aten.expand %569, %599, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%601 = torch.operator "aten.clone"(%600, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%602 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%603 = torch.operator "aten._unsafe_view"(%601, %602) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%604 = torch.aten.bmm %598, %603 : !torch.tensor, !torch.tensor -> !torch.tensor
%605 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%606 = torch.operator "aten._unsafe_view"(%604, %605) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%607 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%608 = torch.aten.permute %606, %607 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%609 = torch.operator "aten.clone"(%608, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%610 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%611 = torch.aten.view %609, %610 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%612 = torch.aten.t %arg120 : !torch.tensor -> !torch.tensor
%613 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%614 = torch.aten.view %611, %613 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%615 = torch.aten.mm %614, %612 : !torch.tensor, !torch.tensor -> !torch.tensor
%616 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%617 = torch.operator "aten._unsafe_view"(%615, %616) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%618 = torch.aten.add_.Tensor %617, %arg119, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%619 = torch.aten.empty_like %618, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%620 = torch.operator "aten.bernoulli_.float"(%619, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%621 = torch.aten.div_.Scalar %620, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%622 = torch.aten.mul.Tensor %618, %621 : !torch.tensor, !torch.tensor -> !torch.tensor
%623 = torch.aten.add.Tensor %622, %result0_27, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%624 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_30, %result1_31, %result2_32 = torch.aten.native_layer_norm %623, %624, %arg118, %arg117, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%625 = torch.aten.t %arg128 : !torch.tensor -> !torch.tensor
%626 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%627 = torch.aten.view %result0_30, %626 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%628 = torch.aten.mm %627, %625 : !torch.tensor, !torch.tensor -> !torch.tensor
%629 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%630 = torch.operator "aten._unsafe_view"(%628, %629) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%631 = torch.aten.add_.Tensor %630, %arg127, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%632 = torch.aten.gelu %631 : !torch.tensor -> !torch.tensor
%633 = torch.aten.t %arg132 : !torch.tensor -> !torch.tensor
%634 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%635 = torch.aten.view %632, %634 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%636 = torch.aten.mm %635, %633 : !torch.tensor, !torch.tensor -> !torch.tensor
%637 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%638 = torch.operator "aten._unsafe_view"(%636, %637) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%639 = torch.aten.add_.Tensor %638, %arg131, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%640 = torch.aten.empty_like %639, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%641 = torch.operator "aten.bernoulli_.float"(%640, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%642 = torch.aten.div_.Scalar %641, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%643 = torch.aten.mul.Tensor %639, %642 : !torch.tensor, !torch.tensor -> !torch.tensor
%644 = torch.aten.add.Tensor %643, %result0_30, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%645 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_33, %result1_34, %result2_35 = torch.aten.native_layer_norm %644, %645, %arg130, %arg129, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%646 = torch.aten.t %arg140 : !torch.tensor -> !torch.tensor
%647 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%648 = torch.aten.view %result0_33, %647 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%649 = torch.aten.mm %648, %646 : !torch.tensor, !torch.tensor -> !torch.tensor
%650 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%651 = torch.operator "aten._unsafe_view"(%649, %650) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%652 = torch.aten.add_.Tensor %651, %arg139, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%653 = torch.aten.t %arg138 : !torch.tensor -> !torch.tensor
%654 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%655 = torch.aten.view %result0_33, %654 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%656 = torch.aten.mm %655, %653 : !torch.tensor, !torch.tensor -> !torch.tensor
%657 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%658 = torch.operator "aten._unsafe_view"(%656, %657) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%659 = torch.aten.add_.Tensor %658, %arg137, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%660 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%661 = torch.aten.view %659, %660 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%662 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%663 = torch.aten.permute %661, %662 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%664 = torch.aten.t %arg142 : !torch.tensor -> !torch.tensor
%665 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%666 = torch.aten.view %result0_33, %665 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%667 = torch.aten.mm %666, %664 : !torch.tensor, !torch.tensor -> !torch.tensor
%668 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%669 = torch.operator "aten._unsafe_view"(%667, %668) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%670 = torch.aten.add_.Tensor %669, %arg141, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%671 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%672 = torch.aten.view %670, %671 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%673 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%674 = torch.aten.permute %672, %673 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%675 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%676 = torch.aten.view %652, %675 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%677 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%678 = torch.aten.permute %676, %677 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%679 = torch.aten.transpose.int %663, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%680 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%681 = torch.aten.expand %678, %680, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%682 = torch.operator "aten.clone"(%681, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%683 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%684 = torch.operator "aten._unsafe_view"(%682, %683) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%685 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%686 = torch.aten.expand %679, %685, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%687 = torch.operator "aten.clone"(%686, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%688 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%689 = torch.operator "aten._unsafe_view"(%687, %688) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%690 = torch.aten.bmm %684, %689 : !torch.tensor, !torch.tensor -> !torch.tensor
%691 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%692 = torch.operator "aten._unsafe_view"(%690, %691) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%693 = torch.aten.div.Tensor %692, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%694 = torch.aten.add.Tensor %693, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%695 = torch.aten._softmax %694, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%696 = torch.aten.empty_like %695, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%697 = torch.operator "aten.bernoulli_.float"(%696, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%698 = torch.aten.div_.Scalar %697, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%699 = torch.aten.mul.Tensor %695, %698 : !torch.tensor, !torch.tensor -> !torch.tensor
%700 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%701 = torch.aten.expand %699, %700, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%702 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%703 = torch.aten.view %701, %702 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%704 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%705 = torch.aten.expand %674, %704, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%706 = torch.operator "aten.clone"(%705, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%707 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%708 = torch.operator "aten._unsafe_view"(%706, %707) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%709 = torch.aten.bmm %703, %708 : !torch.tensor, !torch.tensor -> !torch.tensor
%710 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%711 = torch.operator "aten._unsafe_view"(%709, %710) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%712 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%713 = torch.aten.permute %711, %712 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%714 = torch.operator "aten.clone"(%713, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%715 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%716 = torch.aten.view %714, %715 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%717 = torch.aten.t %arg136 : !torch.tensor -> !torch.tensor
%718 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%719 = torch.aten.view %716, %718 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%720 = torch.aten.mm %719, %717 : !torch.tensor, !torch.tensor -> !torch.tensor
%721 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%722 = torch.operator "aten._unsafe_view"(%720, %721) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%723 = torch.aten.add_.Tensor %722, %arg135, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%724 = torch.aten.empty_like %723, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%725 = torch.operator "aten.bernoulli_.float"(%724, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%726 = torch.aten.div_.Scalar %725, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%727 = torch.aten.mul.Tensor %723, %726 : !torch.tensor, !torch.tensor -> !torch.tensor
%728 = torch.aten.add.Tensor %727, %result0_33, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%729 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_36, %result1_37, %result2_38 = torch.aten.native_layer_norm %728, %729, %arg134, %arg133, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%730 = torch.aten.t %arg144 : !torch.tensor -> !torch.tensor
%731 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%732 = torch.aten.view %result0_36, %731 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%733 = torch.aten.mm %732, %730 : !torch.tensor, !torch.tensor -> !torch.tensor
%734 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%735 = torch.operator "aten._unsafe_view"(%733, %734) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%736 = torch.aten.add_.Tensor %735, %arg143, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%737 = torch.aten.gelu %736 : !torch.tensor -> !torch.tensor
%738 = torch.aten.t %arg148 : !torch.tensor -> !torch.tensor
%739 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%740 = torch.aten.view %737, %739 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%741 = torch.aten.mm %740, %738 : !torch.tensor, !torch.tensor -> !torch.tensor
%742 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%743 = torch.operator "aten._unsafe_view"(%741, %742) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%744 = torch.aten.add_.Tensor %743, %arg147, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%745 = torch.aten.empty_like %744, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%746 = torch.operator "aten.bernoulli_.float"(%745, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%747 = torch.aten.div_.Scalar %746, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%748 = torch.aten.mul.Tensor %744, %747 : !torch.tensor, !torch.tensor -> !torch.tensor
%749 = torch.aten.add.Tensor %748, %result0_36, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%750 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_39, %result1_40, %result2_41 = torch.aten.native_layer_norm %749, %750, %arg146, %arg145, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%751 = torch.aten.t %arg156 : !torch.tensor -> !torch.tensor
%752 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%753 = torch.aten.view %result0_39, %752 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%754 = torch.aten.mm %753, %751 : !torch.tensor, !torch.tensor -> !torch.tensor
%755 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%756 = torch.operator "aten._unsafe_view"(%754, %755) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%757 = torch.aten.add_.Tensor %756, %arg155, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%758 = torch.aten.t %arg154 : !torch.tensor -> !torch.tensor
%759 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%760 = torch.aten.view %result0_39, %759 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%761 = torch.aten.mm %760, %758 : !torch.tensor, !torch.tensor -> !torch.tensor
%762 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%763 = torch.operator "aten._unsafe_view"(%761, %762) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%764 = torch.aten.add_.Tensor %763, %arg153, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%765 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%766 = torch.aten.view %764, %765 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%767 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%768 = torch.aten.permute %766, %767 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%769 = torch.aten.t %arg158 : !torch.tensor -> !torch.tensor
%770 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%771 = torch.aten.view %result0_39, %770 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%772 = torch.aten.mm %771, %769 : !torch.tensor, !torch.tensor -> !torch.tensor
%773 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%774 = torch.operator "aten._unsafe_view"(%772, %773) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%775 = torch.aten.add_.Tensor %774, %arg157, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%776 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%777 = torch.aten.view %775, %776 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%778 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%779 = torch.aten.permute %777, %778 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%780 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%781 = torch.aten.view %757, %780 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%782 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%783 = torch.aten.permute %781, %782 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%784 = torch.aten.transpose.int %768, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%785 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%786 = torch.aten.expand %783, %785, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%787 = torch.operator "aten.clone"(%786, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%788 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%789 = torch.operator "aten._unsafe_view"(%787, %788) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%790 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%791 = torch.aten.expand %784, %790, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%792 = torch.operator "aten.clone"(%791, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%793 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%794 = torch.operator "aten._unsafe_view"(%792, %793) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%795 = torch.aten.bmm %789, %794 : !torch.tensor, !torch.tensor -> !torch.tensor
%796 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%797 = torch.operator "aten._unsafe_view"(%795, %796) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%798 = torch.aten.div.Tensor %797, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%799 = torch.aten.add.Tensor %798, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%800 = torch.aten._softmax %799, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%801 = torch.aten.empty_like %800, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%802 = torch.operator "aten.bernoulli_.float"(%801, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%803 = torch.aten.div_.Scalar %802, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%804 = torch.aten.mul.Tensor %800, %803 : !torch.tensor, !torch.tensor -> !torch.tensor
%805 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%806 = torch.aten.expand %804, %805, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%807 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%808 = torch.aten.view %806, %807 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%809 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%810 = torch.aten.expand %779, %809, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%811 = torch.operator "aten.clone"(%810, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%812 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%813 = torch.operator "aten._unsafe_view"(%811, %812) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%814 = torch.aten.bmm %808, %813 : !torch.tensor, !torch.tensor -> !torch.tensor
%815 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%816 = torch.operator "aten._unsafe_view"(%814, %815) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%817 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%818 = torch.aten.permute %816, %817 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%819 = torch.operator "aten.clone"(%818, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%820 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%821 = torch.aten.view %819, %820 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%822 = torch.aten.t %arg152 : !torch.tensor -> !torch.tensor
%823 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%824 = torch.aten.view %821, %823 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%825 = torch.aten.mm %824, %822 : !torch.tensor, !torch.tensor -> !torch.tensor
%826 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%827 = torch.operator "aten._unsafe_view"(%825, %826) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%828 = torch.aten.add_.Tensor %827, %arg151, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%829 = torch.aten.empty_like %828, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%830 = torch.operator "aten.bernoulli_.float"(%829, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%831 = torch.aten.div_.Scalar %830, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%832 = torch.aten.mul.Tensor %828, %831 : !torch.tensor, !torch.tensor -> !torch.tensor
%833 = torch.aten.add.Tensor %832, %result0_39, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%834 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_42, %result1_43, %result2_44 = torch.aten.native_layer_norm %833, %834, %arg150, %arg149, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%835 = torch.aten.t %arg160 : !torch.tensor -> !torch.tensor
%836 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%837 = torch.aten.view %result0_42, %836 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%838 = torch.aten.mm %837, %835 : !torch.tensor, !torch.tensor -> !torch.tensor
%839 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%840 = torch.operator "aten._unsafe_view"(%838, %839) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%841 = torch.aten.add_.Tensor %840, %arg159, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%842 = torch.aten.gelu %841 : !torch.tensor -> !torch.tensor
%843 = torch.aten.t %arg164 : !torch.tensor -> !torch.tensor
%844 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%845 = torch.aten.view %842, %844 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%846 = torch.aten.mm %845, %843 : !torch.tensor, !torch.tensor -> !torch.tensor
%847 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%848 = torch.operator "aten._unsafe_view"(%846, %847) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%849 = torch.aten.add_.Tensor %848, %arg163, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%850 = torch.aten.empty_like %849, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%851 = torch.operator "aten.bernoulli_.float"(%850, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%852 = torch.aten.div_.Scalar %851, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%853 = torch.aten.mul.Tensor %849, %852 : !torch.tensor, !torch.tensor -> !torch.tensor
%854 = torch.aten.add.Tensor %853, %result0_42, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%855 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_45, %result1_46, %result2_47 = torch.aten.native_layer_norm %854, %855, %arg162, %arg161, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%856 = torch.aten.t %arg172 : !torch.tensor -> !torch.tensor
%857 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%858 = torch.aten.view %result0_45, %857 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%859 = torch.aten.mm %858, %856 : !torch.tensor, !torch.tensor -> !torch.tensor
%860 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%861 = torch.operator "aten._unsafe_view"(%859, %860) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%862 = torch.aten.add_.Tensor %861, %arg171, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%863 = torch.aten.t %arg170 : !torch.tensor -> !torch.tensor
%864 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%865 = torch.aten.view %result0_45, %864 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%866 = torch.aten.mm %865, %863 : !torch.tensor, !torch.tensor -> !torch.tensor
%867 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%868 = torch.operator "aten._unsafe_view"(%866, %867) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%869 = torch.aten.add_.Tensor %868, %arg169, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%870 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%871 = torch.aten.view %869, %870 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%872 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%873 = torch.aten.permute %871, %872 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%874 = torch.aten.t %arg174 : !torch.tensor -> !torch.tensor
%875 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%876 = torch.aten.view %result0_45, %875 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%877 = torch.aten.mm %876, %874 : !torch.tensor, !torch.tensor -> !torch.tensor
%878 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%879 = torch.operator "aten._unsafe_view"(%877, %878) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%880 = torch.aten.add_.Tensor %879, %arg173, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%881 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%882 = torch.aten.view %880, %881 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%883 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%884 = torch.aten.permute %882, %883 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%885 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%886 = torch.aten.view %862, %885 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%887 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%888 = torch.aten.permute %886, %887 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%889 = torch.aten.transpose.int %873, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%890 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%891 = torch.aten.expand %888, %890, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%892 = torch.operator "aten.clone"(%891, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%893 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%894 = torch.operator "aten._unsafe_view"(%892, %893) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%895 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%896 = torch.aten.expand %889, %895, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%897 = torch.operator "aten.clone"(%896, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%898 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%899 = torch.operator "aten._unsafe_view"(%897, %898) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%900 = torch.aten.bmm %894, %899 : !torch.tensor, !torch.tensor -> !torch.tensor
%901 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%902 = torch.operator "aten._unsafe_view"(%900, %901) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%903 = torch.aten.div.Tensor %902, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%904 = torch.aten.add.Tensor %903, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%905 = torch.aten._softmax %904, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%906 = torch.aten.empty_like %905, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%907 = torch.operator "aten.bernoulli_.float"(%906, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%908 = torch.aten.div_.Scalar %907, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%909 = torch.aten.mul.Tensor %905, %908 : !torch.tensor, !torch.tensor -> !torch.tensor
%910 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%911 = torch.aten.expand %909, %910, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%912 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%913 = torch.aten.view %911, %912 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%914 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%915 = torch.aten.expand %884, %914, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%916 = torch.operator "aten.clone"(%915, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%917 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%918 = torch.operator "aten._unsafe_view"(%916, %917) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%919 = torch.aten.bmm %913, %918 : !torch.tensor, !torch.tensor -> !torch.tensor
%920 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%921 = torch.operator "aten._unsafe_view"(%919, %920) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%922 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%923 = torch.aten.permute %921, %922 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%924 = torch.operator "aten.clone"(%923, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%925 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%926 = torch.aten.view %924, %925 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%927 = torch.aten.t %arg168 : !torch.tensor -> !torch.tensor
%928 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%929 = torch.aten.view %926, %928 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%930 = torch.aten.mm %929, %927 : !torch.tensor, !torch.tensor -> !torch.tensor
%931 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%932 = torch.operator "aten._unsafe_view"(%930, %931) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%933 = torch.aten.add_.Tensor %932, %arg167, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%934 = torch.aten.empty_like %933, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%935 = torch.operator "aten.bernoulli_.float"(%934, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%936 = torch.aten.div_.Scalar %935, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%937 = torch.aten.mul.Tensor %933, %936 : !torch.tensor, !torch.tensor -> !torch.tensor
%938 = torch.aten.add.Tensor %937, %result0_45, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%939 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_48, %result1_49, %result2_50 = torch.aten.native_layer_norm %938, %939, %arg166, %arg165, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%940 = torch.aten.t %arg176 : !torch.tensor -> !torch.tensor
%941 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%942 = torch.aten.view %result0_48, %941 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%943 = torch.aten.mm %942, %940 : !torch.tensor, !torch.tensor -> !torch.tensor
%944 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%945 = torch.operator "aten._unsafe_view"(%943, %944) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%946 = torch.aten.add_.Tensor %945, %arg175, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%947 = torch.aten.gelu %946 : !torch.tensor -> !torch.tensor
%948 = torch.aten.t %arg180 : !torch.tensor -> !torch.tensor
%949 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%950 = torch.aten.view %947, %949 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%951 = torch.aten.mm %950, %948 : !torch.tensor, !torch.tensor -> !torch.tensor
%952 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%953 = torch.operator "aten._unsafe_view"(%951, %952) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%954 = torch.aten.add_.Tensor %953, %arg179, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%955 = torch.aten.empty_like %954, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%956 = torch.operator "aten.bernoulli_.float"(%955, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%957 = torch.aten.div_.Scalar %956, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%958 = torch.aten.mul.Tensor %954, %957 : !torch.tensor, !torch.tensor -> !torch.tensor
%959 = torch.aten.add.Tensor %958, %result0_48, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%960 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_51, %result1_52, %result2_53 = torch.aten.native_layer_norm %959, %960, %arg178, %arg177, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%961 = torch.aten.t %arg188 : !torch.tensor -> !torch.tensor
%962 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%963 = torch.aten.view %result0_51, %962 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%964 = torch.aten.mm %963, %961 : !torch.tensor, !torch.tensor -> !torch.tensor
%965 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%966 = torch.operator "aten._unsafe_view"(%964, %965) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%967 = torch.aten.add_.Tensor %966, %arg187, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%968 = torch.aten.t %arg186 : !torch.tensor -> !torch.tensor
%969 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%970 = torch.aten.view %result0_51, %969 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%971 = torch.aten.mm %970, %968 : !torch.tensor, !torch.tensor -> !torch.tensor
%972 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%973 = torch.operator "aten._unsafe_view"(%971, %972) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%974 = torch.aten.add_.Tensor %973, %arg185, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%975 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%976 = torch.aten.view %974, %975 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%977 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%978 = torch.aten.permute %976, %977 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%979 = torch.aten.t %arg190 : !torch.tensor -> !torch.tensor
%980 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%981 = torch.aten.view %result0_51, %980 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%982 = torch.aten.mm %981, %979 : !torch.tensor, !torch.tensor -> !torch.tensor
%983 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%984 = torch.operator "aten._unsafe_view"(%982, %983) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%985 = torch.aten.add_.Tensor %984, %arg189, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%986 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%987 = torch.aten.view %985, %986 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%988 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%989 = torch.aten.permute %987, %988 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%990 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%991 = torch.aten.view %967, %990 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%992 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%993 = torch.aten.permute %991, %992 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%994 = torch.aten.transpose.int %978, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%995 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%996 = torch.aten.expand %993, %995, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%997 = torch.operator "aten.clone"(%996, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%998 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%999 = torch.operator "aten._unsafe_view"(%997, %998) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1000 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1001 = torch.aten.expand %994, %1000, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1002 = torch.operator "aten.clone"(%1001, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1003 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1004 = torch.operator "aten._unsafe_view"(%1002, %1003) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1005 = torch.aten.bmm %999, %1004 : !torch.tensor, !torch.tensor -> !torch.tensor
%1006 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1007 = torch.operator "aten._unsafe_view"(%1005, %1006) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1008 = torch.aten.div.Tensor %1007, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1009 = torch.aten.add.Tensor %1008, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1010 = torch.aten._softmax %1009, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1011 = torch.aten.empty_like %1010, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1012 = torch.operator "aten.bernoulli_.float"(%1011, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1013 = torch.aten.div_.Scalar %1012, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1014 = torch.aten.mul.Tensor %1010, %1013 : !torch.tensor, !torch.tensor -> !torch.tensor
%1015 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1016 = torch.aten.expand %1014, %1015, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1017 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1018 = torch.aten.view %1016, %1017 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1019 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1020 = torch.aten.expand %989, %1019, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1021 = torch.operator "aten.clone"(%1020, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1022 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1023 = torch.operator "aten._unsafe_view"(%1021, %1022) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1024 = torch.aten.bmm %1018, %1023 : !torch.tensor, !torch.tensor -> !torch.tensor
%1025 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1026 = torch.operator "aten._unsafe_view"(%1024, %1025) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1027 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1028 = torch.aten.permute %1026, %1027 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1029 = torch.operator "aten.clone"(%1028, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1030 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1031 = torch.aten.view %1029, %1030 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1032 = torch.aten.t %arg184 : !torch.tensor -> !torch.tensor
%1033 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1034 = torch.aten.view %1031, %1033 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1035 = torch.aten.mm %1034, %1032 : !torch.tensor, !torch.tensor -> !torch.tensor
%1036 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1037 = torch.operator "aten._unsafe_view"(%1035, %1036) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1038 = torch.aten.add_.Tensor %1037, %arg183, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1039 = torch.aten.empty_like %1038, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1040 = torch.operator "aten.bernoulli_.float"(%1039, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1041 = torch.aten.div_.Scalar %1040, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1042 = torch.aten.mul.Tensor %1038, %1041 : !torch.tensor, !torch.tensor -> !torch.tensor
%1043 = torch.aten.add.Tensor %1042, %result0_51, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1044 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_54, %result1_55, %result2_56 = torch.aten.native_layer_norm %1043, %1044, %arg182, %arg181, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1045 = torch.aten.t %arg192 : !torch.tensor -> !torch.tensor
%1046 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1047 = torch.aten.view %result0_54, %1046 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1048 = torch.aten.mm %1047, %1045 : !torch.tensor, !torch.tensor -> !torch.tensor
%1049 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1050 = torch.operator "aten._unsafe_view"(%1048, %1049) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1051 = torch.aten.add_.Tensor %1050, %arg191, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1052 = torch.aten.gelu %1051 : !torch.tensor -> !torch.tensor
%1053 = torch.aten.t %arg196 : !torch.tensor -> !torch.tensor
%1054 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1055 = torch.aten.view %1052, %1054 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1056 = torch.aten.mm %1055, %1053 : !torch.tensor, !torch.tensor -> !torch.tensor
%1057 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1058 = torch.operator "aten._unsafe_view"(%1056, %1057) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1059 = torch.aten.add_.Tensor %1058, %arg195, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1060 = torch.aten.empty_like %1059, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1061 = torch.operator "aten.bernoulli_.float"(%1060, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1062 = torch.aten.div_.Scalar %1061, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1063 = torch.aten.mul.Tensor %1059, %1062 : !torch.tensor, !torch.tensor -> !torch.tensor
%1064 = torch.aten.add.Tensor %1063, %result0_54, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1065 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_57, %result1_58, %result2_59 = torch.aten.native_layer_norm %1064, %1065, %arg194, %arg193, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1066 = torch.aten.t %arg44 : !torch.tensor -> !torch.tensor
%1067 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1068 = torch.aten.view %result0_57, %1067 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1069 = torch.aten.mm %1068, %1066 : !torch.tensor, !torch.tensor -> !torch.tensor
%1070 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1071 = torch.operator "aten._unsafe_view"(%1069, %1070) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1072 = torch.aten.add_.Tensor %1071, %arg43, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1073 = torch.aten.t %arg42 : !torch.tensor -> !torch.tensor
%1074 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1075 = torch.aten.view %result0_57, %1074 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1076 = torch.aten.mm %1075, %1073 : !torch.tensor, !torch.tensor -> !torch.tensor
%1077 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1078 = torch.operator "aten._unsafe_view"(%1076, %1077) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1079 = torch.aten.add_.Tensor %1078, %arg41, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1080 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1081 = torch.aten.view %1079, %1080 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1082 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1083 = torch.aten.permute %1081, %1082 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1084 = torch.aten.t %arg46 : !torch.tensor -> !torch.tensor
%1085 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1086 = torch.aten.view %result0_57, %1085 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1087 = torch.aten.mm %1086, %1084 : !torch.tensor, !torch.tensor -> !torch.tensor
%1088 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1089 = torch.operator "aten._unsafe_view"(%1087, %1088) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1090 = torch.aten.add_.Tensor %1089, %arg45, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1091 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1092 = torch.aten.view %1090, %1091 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1093 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1094 = torch.aten.permute %1092, %1093 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1095 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1096 = torch.aten.view %1072, %1095 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1097 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1098 = torch.aten.permute %1096, %1097 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1099 = torch.aten.transpose.int %1083, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1100 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1101 = torch.aten.expand %1098, %1100, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1102 = torch.operator "aten.clone"(%1101, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1103 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1104 = torch.operator "aten._unsafe_view"(%1102, %1103) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1105 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1106 = torch.aten.expand %1099, %1105, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1107 = torch.operator "aten.clone"(%1106, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1108 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1109 = torch.operator "aten._unsafe_view"(%1107, %1108) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1110 = torch.aten.bmm %1104, %1109 : !torch.tensor, !torch.tensor -> !torch.tensor
%1111 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1112 = torch.operator "aten._unsafe_view"(%1110, %1111) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1113 = torch.aten.div.Tensor %1112, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1114 = torch.aten.add.Tensor %1113, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1115 = torch.aten._softmax %1114, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1116 = torch.aten.empty_like %1115, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1117 = torch.operator "aten.bernoulli_.float"(%1116, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1118 = torch.aten.div_.Scalar %1117, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1119 = torch.aten.mul.Tensor %1115, %1118 : !torch.tensor, !torch.tensor -> !torch.tensor
%1120 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1121 = torch.aten.expand %1119, %1120, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1122 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1123 = torch.aten.view %1121, %1122 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1124 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1125 = torch.aten.expand %1094, %1124, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1126 = torch.operator "aten.clone"(%1125, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1127 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1128 = torch.operator "aten._unsafe_view"(%1126, %1127) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1129 = torch.aten.bmm %1123, %1128 : !torch.tensor, !torch.tensor -> !torch.tensor
%1130 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1131 = torch.operator "aten._unsafe_view"(%1129, %1130) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1132 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1133 = torch.aten.permute %1131, %1132 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1134 = torch.operator "aten.clone"(%1133, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1135 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1136 = torch.aten.view %1134, %1135 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1137 = torch.aten.t %arg40 : !torch.tensor -> !torch.tensor
%1138 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1139 = torch.aten.view %1136, %1138 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1140 = torch.aten.mm %1139, %1137 : !torch.tensor, !torch.tensor -> !torch.tensor
%1141 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1142 = torch.operator "aten._unsafe_view"(%1140, %1141) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1143 = torch.aten.add_.Tensor %1142, %arg39, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1144 = torch.aten.empty_like %1143, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1145 = torch.operator "aten.bernoulli_.float"(%1144, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1146 = torch.aten.div_.Scalar %1145, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1147 = torch.aten.mul.Tensor %1143, %1146 : !torch.tensor, !torch.tensor -> !torch.tensor
%1148 = torch.aten.add.Tensor %1147, %result0_57, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1149 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_60, %result1_61, %result2_62 = torch.aten.native_layer_norm %1148, %1149, %arg38, %arg37, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1150 = torch.aten.t %arg48 : !torch.tensor -> !torch.tensor
%1151 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1152 = torch.aten.view %result0_60, %1151 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1153 = torch.aten.mm %1152, %1150 : !torch.tensor, !torch.tensor -> !torch.tensor
%1154 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1155 = torch.operator "aten._unsafe_view"(%1153, %1154) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1156 = torch.aten.add_.Tensor %1155, %arg47, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1157 = torch.aten.gelu %1156 : !torch.tensor -> !torch.tensor
%1158 = torch.aten.t %arg52 : !torch.tensor -> !torch.tensor
%1159 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1160 = torch.aten.view %1157, %1159 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1161 = torch.aten.mm %1160, %1158 : !torch.tensor, !torch.tensor -> !torch.tensor
%1162 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1163 = torch.operator "aten._unsafe_view"(%1161, %1162) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1164 = torch.aten.add_.Tensor %1163, %arg51, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1165 = torch.aten.empty_like %1164, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1166 = torch.operator "aten.bernoulli_.float"(%1165, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1167 = torch.aten.div_.Scalar %1166, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1168 = torch.aten.mul.Tensor %1164, %1167 : !torch.tensor, !torch.tensor -> !torch.tensor
%1169 = torch.aten.add.Tensor %1168, %result0_60, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1170 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_63, %result1_64, %result2_65 = torch.aten.native_layer_norm %1169, %1170, %arg50, %arg49, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1171 = torch.aten.t %arg60 : !torch.tensor -> !torch.tensor
%1172 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1173 = torch.aten.view %result0_63, %1172 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1174 = torch.aten.mm %1173, %1171 : !torch.tensor, !torch.tensor -> !torch.tensor
%1175 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1176 = torch.operator "aten._unsafe_view"(%1174, %1175) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1177 = torch.aten.add_.Tensor %1176, %arg59, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1178 = torch.aten.t %arg58 : !torch.tensor -> !torch.tensor
%1179 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1180 = torch.aten.view %result0_63, %1179 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1181 = torch.aten.mm %1180, %1178 : !torch.tensor, !torch.tensor -> !torch.tensor
%1182 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1183 = torch.operator "aten._unsafe_view"(%1181, %1182) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1184 = torch.aten.add_.Tensor %1183, %arg57, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1185 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1186 = torch.aten.view %1184, %1185 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1187 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1188 = torch.aten.permute %1186, %1187 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1189 = torch.aten.t %arg62 : !torch.tensor -> !torch.tensor
%1190 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1191 = torch.aten.view %result0_63, %1190 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1192 = torch.aten.mm %1191, %1189 : !torch.tensor, !torch.tensor -> !torch.tensor
%1193 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1194 = torch.operator "aten._unsafe_view"(%1192, %1193) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1195 = torch.aten.add_.Tensor %1194, %arg61, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1196 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1197 = torch.aten.view %1195, %1196 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1198 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1199 = torch.aten.permute %1197, %1198 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1200 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1201 = torch.aten.view %1177, %1200 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1202 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1203 = torch.aten.permute %1201, %1202 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1204 = torch.aten.transpose.int %1188, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1205 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1206 = torch.aten.expand %1203, %1205, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1207 = torch.operator "aten.clone"(%1206, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1208 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1209 = torch.operator "aten._unsafe_view"(%1207, %1208) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1210 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1211 = torch.aten.expand %1204, %1210, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1212 = torch.operator "aten.clone"(%1211, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1213 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1214 = torch.operator "aten._unsafe_view"(%1212, %1213) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1215 = torch.aten.bmm %1209, %1214 : !torch.tensor, !torch.tensor -> !torch.tensor
%1216 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1217 = torch.operator "aten._unsafe_view"(%1215, %1216) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1218 = torch.aten.div.Tensor %1217, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1219 = torch.aten.add.Tensor %1218, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1220 = torch.aten._softmax %1219, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1221 = torch.aten.empty_like %1220, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1222 = torch.operator "aten.bernoulli_.float"(%1221, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1223 = torch.aten.div_.Scalar %1222, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1224 = torch.aten.mul.Tensor %1220, %1223 : !torch.tensor, !torch.tensor -> !torch.tensor
%1225 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1226 = torch.aten.expand %1224, %1225, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1227 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1228 = torch.aten.view %1226, %1227 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1229 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1230 = torch.aten.expand %1199, %1229, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1231 = torch.operator "aten.clone"(%1230, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1232 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1233 = torch.operator "aten._unsafe_view"(%1231, %1232) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1234 = torch.aten.bmm %1228, %1233 : !torch.tensor, !torch.tensor -> !torch.tensor
%1235 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1236 = torch.operator "aten._unsafe_view"(%1234, %1235) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1237 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1238 = torch.aten.permute %1236, %1237 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1239 = torch.operator "aten.clone"(%1238, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1240 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1241 = torch.aten.view %1239, %1240 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1242 = torch.aten.t %arg56 : !torch.tensor -> !torch.tensor
%1243 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1244 = torch.aten.view %1241, %1243 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1245 = torch.aten.mm %1244, %1242 : !torch.tensor, !torch.tensor -> !torch.tensor
%1246 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1247 = torch.operator "aten._unsafe_view"(%1245, %1246) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1248 = torch.aten.add_.Tensor %1247, %arg55, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1249 = torch.aten.empty_like %1248, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1250 = torch.operator "aten.bernoulli_.float"(%1249, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1251 = torch.aten.div_.Scalar %1250, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1252 = torch.aten.mul.Tensor %1248, %1251 : !torch.tensor, !torch.tensor -> !torch.tensor
%1253 = torch.aten.add.Tensor %1252, %result0_63, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1254 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_66, %result1_67, %result2_68 = torch.aten.native_layer_norm %1253, %1254, %arg54, %arg53, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1255 = torch.aten.t %arg64 : !torch.tensor -> !torch.tensor
%1256 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1257 = torch.aten.view %result0_66, %1256 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1258 = torch.aten.mm %1257, %1255 : !torch.tensor, !torch.tensor -> !torch.tensor
%1259 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1260 = torch.operator "aten._unsafe_view"(%1258, %1259) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1261 = torch.aten.add_.Tensor %1260, %arg63, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1262 = torch.aten.gelu %1261 : !torch.tensor -> !torch.tensor
%1263 = torch.aten.t %arg68 : !torch.tensor -> !torch.tensor
%1264 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1265 = torch.aten.view %1262, %1264 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1266 = torch.aten.mm %1265, %1263 : !torch.tensor, !torch.tensor -> !torch.tensor
%1267 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1268 = torch.operator "aten._unsafe_view"(%1266, %1267) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1269 = torch.aten.add_.Tensor %1268, %arg67, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1270 = torch.aten.empty_like %1269, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1271 = torch.operator "aten.bernoulli_.float"(%1270, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1272 = torch.aten.div_.Scalar %1271, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1273 = torch.aten.mul.Tensor %1269, %1272 : !torch.tensor, !torch.tensor -> !torch.tensor
%1274 = torch.aten.add.Tensor %1273, %result0_66, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1275 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_69, %result1_70, %result2_71 = torch.aten.native_layer_norm %1274, %1275, %arg66, %arg65, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1276 = torch.aten.t %arg203 : !torch.tensor -> !torch.tensor
%1277 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1278 = torch.aten.view %result0_69, %1277 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1279 = torch.aten.mm %1278, %1276 : !torch.tensor, !torch.tensor -> !torch.tensor
%1280 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1281 = torch.operator "aten._unsafe_view"(%1279, %1280) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1282 = torch.aten.add_.Tensor %1281, %arg202, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1283 = torch.aten.gelu %1282 : !torch.tensor -> !torch.tensor
%1284 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_72, %result1_73, %result2_74 = torch.aten.native_layer_norm %1283, %1284, %arg201, %arg200, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1285 = torch.aten.t %arg199 : !torch.tensor -> !torch.tensor
%1286 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1287 = torch.aten.view %result0_72, %1286 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1288 = torch.aten.mm %1287, %1285 : !torch.tensor, !torch.tensor -> !torch.tensor
%1289 = torch.prim.ListConstruct %int4, %int512, %int30522 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1290 = torch.operator "aten._unsafe_view"(%1288, %1289) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1291 = torch.aten.add_.Tensor %1290, %arg198, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1292 = torch.prim.ListConstruct %int-1, %int30522 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1293 = torch.aten.view %1291, %1292 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1294 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<!torch.int>
%1295 = torch.aten.view %arg207, %1294 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1296 = torch.operator "aten._log_softmax"(%1293, %int1, %false) : (!torch.tensor, !torch.int, !torch.bool) -> !torch.tensor
%output, %total_weight = torch.aten.nll_loss_forward %1296, %1295, %none, %int1, %int-100 : !torch.tensor, !torch.tensor, !torch.none, !torch.int, !torch.int -> !torch.tensor, !torch.tensor
%1297 = torch.aten.transpose.int %1228, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1298 = torch.aten.transpose.int %1233, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1299 = torch.aten.transpose.int %1209, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1300 = torch.aten.transpose.int %1214, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1301 = torch.aten.transpose.int %1123, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1302 = torch.aten.transpose.int %1128, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1303 = torch.aten.transpose.int %1104, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1304 = torch.aten.transpose.int %1109, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1305 = torch.aten.transpose.int %1018, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1306 = torch.aten.transpose.int %1023, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1307 = torch.aten.transpose.int %999, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1308 = torch.aten.transpose.int %1004, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1309 = torch.aten.transpose.int %913, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1310 = torch.aten.transpose.int %918, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1311 = torch.aten.transpose.int %894, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1312 = torch.aten.transpose.int %899, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1313 = torch.aten.transpose.int %808, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1314 = torch.aten.transpose.int %813, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1315 = torch.aten.transpose.int %789, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1316 = torch.aten.transpose.int %794, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1317 = torch.aten.transpose.int %703, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1318 = torch.aten.transpose.int %708, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1319 = torch.aten.transpose.int %684, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1320 = torch.aten.transpose.int %689, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1321 = torch.aten.transpose.int %598, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1322 = torch.aten.transpose.int %603, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1323 = torch.aten.transpose.int %579, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1324 = torch.aten.transpose.int %584, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1325 = torch.aten.transpose.int %493, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1326 = torch.aten.transpose.int %498, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1327 = torch.aten.transpose.int %474, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1328 = torch.aten.transpose.int %479, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1329 = torch.aten.transpose.int %388, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1330 = torch.aten.transpose.int %393, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1331 = torch.aten.transpose.int %369, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1332 = torch.aten.transpose.int %374, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1333 = torch.aten.transpose.int %283, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1334 = torch.aten.transpose.int %288, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1335 = torch.aten.transpose.int %264, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1336 = torch.aten.transpose.int %269, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1337 = torch.aten.transpose.int %178, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1338 = torch.aten.transpose.int %183, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1339 = torch.aten.transpose.int %159, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1340 = torch.aten.transpose.int %164, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1341 = torch.aten.transpose.int %73, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1342 = torch.aten.transpose.int %78, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1343 = torch.aten.transpose.int %54, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1344 = torch.aten.transpose.int %59, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1345 = torch.prim.ListConstruct %output, %1291, %306, %result2_20, %arg98, %result2_50, %1013, %1274, %arg36, %result1_16, %824, %590, %1075, %arg120, %arg37, %445, %result2_5, %arg161, %arg26, %1337, %1244, %result2_11, %result1_7, %arg100, %1223, %arg76, %arg136, %123, %arg94, %1296, %509, %result1_10, %arg116, %320, %593, %732, %arg177, %arg16, %539, %arg133, %arg49, %963, %result1_52, %arg70, %719, %arg40, %arg33, %arg174, %result2_2, %arg60, %arg53, %4, %1115, %203, %arg118, %arg65, %arg200, %result1_37, %arg30, %arg90, %1321, %456, %1329, %result1_46, %1152, %1313, %arg0, %96, %1331, %arg112, %655, %result2_35, %arg66, %arg130, %arg38, %arg194, %arg166, %1220, %333, %25, %1251, %327, %arg149, %result1_34, %516, %1300, %627, %518, %852, %arg188, %result1_4, %result2_56, %1333, %1302, %arg158, %905, %arg44, %278, %438, %211, %result2_41, %1324, %854, %arg152, %1334, %858, %arg81, %876, %488, %arg28, %result2_44, %arg181, %117, %result1_22, %1086, %arg150, %result1_49, %380, %arg145, %1315, %arg140, %1307, %5, %result2_47, %arg192, %946, %1323, %arg92, %621, %arg21, %1325, %arg42, %arg186, %result2_53, %614, %1339, %215, %981, %result2_23, %result1_28, %14, %arg138, %arg193, %arg154, %arg203, %65, %result1_43, %arg46, %666, %1055, %1309, %arg146, %result1_73, %1156, %1328, %arg182, %arg180, %1318, %740, %1299, %383, %1327, %803, %arg104, %1322, %68, %194, %1051, %1265, %1139, %arg17, %result2_38, %222, %299, %arg106, %644, %207, %result1_70, %831, %432, %170, %arg85, %938, %957, %1343, %result1, %561, %1303, %arg74, %1068, %89, %485, %arg64, %arg190, %417, %1146, %312, %arg160, %arg124, %1338, %result1_13, %result2_68, %arg164, %arg18, %425, %1341, %970, %arg68, %result2_8, %arg101, %result2_17, %arg1, %942, %1278, %arg88, %275, %411, %434, %1312, %arg128, %1314, %arg80, %749, %arg62, %1010, %1160, %1272, %1173, %1298, %arg110, %result1_55, %543, %800, %arg184, %929, %arg178, %arg113, %arg48, %698, %1287, %result1_31, %1308, %1043, %1047, %1261, %1304, %18, %result2_26, %arg97, %1305, %arg14, %936, %695, %arg84, %228, %760, %173, %result2_71, %arg86, %arg134, %642, %908, %result2_65, %1335, %arg122, %arg54, %arg58, %total_weight, %771, %1332, %result1_67, %1317, %arg129, %1167, %arg132, %246, %753, %arg96, %result1_64, %result2, %1330, %result1_40, %841, %arg20, %result1_25, %1320, %329, %736, %result2_14, %result1_58, %result2_29, %1148, %98, %351, %arg10, %141, %1253, %308, %arg24, %550, %arg108, %1344, %1257, %1342, %arg69, %arg201, %arg206, %413, %arg126, %833, %arg32, %arg56, %845, %623, %537, %arg142, %result1_19, %1301, %result1_1, %201, %1310, %1340, %1326, %36, %arg78, %arg196, %1336, %10, %result2_32, %1311, %648, %421, %235, %726, %arg148, %arg199, %arg176, %arg168, %1062, %1295, %1319, %arg6, %arg162, %950, %1296, %728, %959, %result2_62, %340, %1118, %result2_74, %arg117, %1282, %119, %arg114, %1180, %arg12, %106, %arg72, %1297, %arg52, %1169, %result2_59, %result1_61, %arg50, %arg172, %arg8, %631, %865, %arg144, %130, %arg34, %1064, %635, %747, %404, %1041, %arg82, %1191, %526, %102, %837, %224, %arg170, %1034, %530, %arg165, %arg22, %arg102, %arg5, %110, %1316, %1306, %arg156, %316, %522 : (!torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor) -> !torch.list<!torch.tensor>
return %1345 : !torch.list<!torch.tensor>
}
}
// -----// IR Dump After SymbolDCE //----- //
module attributes {torch.debug_module_name = "GraphModule"} {
func @forward(%arg0: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg1: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg2: !torch.tensor {torch.type_bound = !torch.vtensor<[512,768],f32>}, %arg3: !torch.tensor {torch.type_bound = !torch.vtensor<[2,768],f32>}, %arg4: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,768],f32>}, %arg5: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg6: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg7: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg8: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg9: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg10: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg11: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg12: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg13: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg14: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg15: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg16: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg17: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg18: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg19: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg20: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg21: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg22: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg23: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg24: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg25: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg26: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg27: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg28: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg29: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg30: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg31: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg32: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg33: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg34: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg35: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg36: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg37: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg38: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg39: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg40: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg41: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg42: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg43: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg44: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg45: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg46: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg47: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg48: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg49: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg50: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg51: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg52: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg53: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg54: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg55: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg56: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg57: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg58: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg59: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg60: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg61: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg62: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg63: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg64: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg65: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg66: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg67: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg68: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg69: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg70: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg71: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg72: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg73: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg74: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg75: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg76: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg77: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg78: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg79: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg80: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg81: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg82: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg83: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg84: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg85: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg86: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg87: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg88: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg89: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg90: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg91: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg92: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg93: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg94: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg95: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg96: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg97: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg98: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg99: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg100: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg101: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg102: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg103: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg104: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg105: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg106: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg107: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg108: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg109: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg110: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg111: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg112: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg113: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg114: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg115: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg116: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg117: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg118: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg119: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg120: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg121: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg122: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg123: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg124: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg125: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg126: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg127: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg128: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg129: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg130: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg131: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg132: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg133: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg134: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg135: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg136: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg137: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg138: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg139: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg140: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg141: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg142: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg143: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg144: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg145: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg146: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg147: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg148: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg149: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg150: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg151: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg152: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg153: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg154: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg155: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg156: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg157: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg158: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg159: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg160: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg161: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg162: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg163: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg164: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg165: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg166: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg167: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg168: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg169: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg170: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg171: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg172: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg173: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg174: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg175: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg176: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg177: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg178: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg179: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg180: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg181: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg182: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg183: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg184: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg185: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg186: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg187: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg188: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg189: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg190: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg191: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg192: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg193: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg194: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg195: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg196: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg197: !torch.tensor {torch.type_bound = !torch.vtensor<[30522],f32>}, %arg198: !torch.tensor {torch.type_bound = !torch.vtensor<[30522],f32>}, %arg199: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,768],f32>}, %arg200: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg201: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg202: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg203: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg204: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg205: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg206: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512],si64>}, %arg207: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512],si64>}) -> !torch.list<!torch.tensor> {
%none = torch.constant.none
%false = torch.constant.bool false
%int0 = torch.constant.int 0
%int9223372036854775807 = torch.constant.int 9223372036854775807
%int4 = torch.constant.int 4
%int512 = torch.constant.int 512
%int768 = torch.constant.int 768
%float9.999990e-13 = torch.constant.float 9.9999999999999998E-13
%float9.000000e-01 = torch.constant.float 9.000000e-01
%int2048 = torch.constant.int 2048
%int12 = torch.constant.int 12
%int64 = torch.constant.int 64
%int2 = torch.constant.int 2
%int1 = torch.constant.int 1
%int3 = torch.constant.int 3
%int-1 = torch.constant.int -1
%int-2 = torch.constant.int -2
%int48 = torch.constant.int 48
%int3072 = torch.constant.int 3072
%int30522 = torch.constant.int 30522
%int-100 = torch.constant.int -100
%0 = torch.tensor.literal(dense<-0.000000e+00> : tensor<4x1x1x512xf32>) : !torch.tensor<[4,1,1,512],f32>
%1 = torch.tensor.literal(dense<8.000000e+00> : tensor<f64>) : !torch.tensor<[],f64>
%2 = torch.aten.slice.Tensor %arg205, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%3 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%4 = torch.aten.expand %2, %3, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%5 = torch.aten.slice.Tensor %arg204, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%6 = torch.aten.embedding %arg4, %arg206, %int0, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%7 = torch.aten.embedding %arg3, %4, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%8 = torch.aten.add.Tensor %6, %7, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%9 = torch.aten.embedding %arg2, %5, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%10 = torch.aten.add_.Tensor %8, %9, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%11 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0, %result1, %result2 = torch.aten.native_layer_norm %10, %11, %arg1, %arg0, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%12 = torch.aten.empty_like %result0, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%13 = torch.operator "aten.bernoulli_.float"(%12, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%14 = torch.aten.div_.Scalar %13, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%15 = torch.aten.mul.Tensor %result0, %14 : !torch.tensor, !torch.tensor -> !torch.tensor
%16 = torch.aten.t %arg12 : !torch.tensor -> !torch.tensor
%17 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%18 = torch.aten.view %15, %17 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%19 = torch.aten.mm %18, %16 : !torch.tensor, !torch.tensor -> !torch.tensor
%20 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%21 = torch.operator "aten._unsafe_view"(%19, %20) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%22 = torch.aten.add_.Tensor %21, %arg11, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%23 = torch.aten.t %arg10 : !torch.tensor -> !torch.tensor
%24 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%25 = torch.aten.view %15, %24 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%26 = torch.aten.mm %25, %23 : !torch.tensor, !torch.tensor -> !torch.tensor
%27 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%28 = torch.operator "aten._unsafe_view"(%26, %27) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%29 = torch.aten.add_.Tensor %28, %arg9, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%30 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%31 = torch.aten.view %29, %30 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%32 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%33 = torch.aten.permute %31, %32 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%34 = torch.aten.t %arg14 : !torch.tensor -> !torch.tensor
%35 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%36 = torch.aten.view %15, %35 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%37 = torch.aten.mm %36, %34 : !torch.tensor, !torch.tensor -> !torch.tensor
%38 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%39 = torch.operator "aten._unsafe_view"(%37, %38) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%40 = torch.aten.add_.Tensor %39, %arg13, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%41 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%42 = torch.aten.view %40, %41 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%43 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%44 = torch.aten.permute %42, %43 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%45 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%46 = torch.aten.view %22, %45 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%47 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%48 = torch.aten.permute %46, %47 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%49 = torch.aten.transpose.int %33, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%50 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%51 = torch.aten.expand %48, %50, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%52 = torch.operator "aten.clone"(%51, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%53 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%54 = torch.operator "aten._unsafe_view"(%52, %53) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%55 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%56 = torch.aten.expand %49, %55, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%57 = torch.operator "aten.clone"(%56, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%58 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%59 = torch.operator "aten._unsafe_view"(%57, %58) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%60 = torch.aten.bmm %54, %59 : !torch.tensor, !torch.tensor -> !torch.tensor
%61 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%62 = torch.operator "aten._unsafe_view"(%60, %61) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%63 = torch.aten.div.Tensor %62, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%64 = torch.aten.add.Tensor %63, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%65 = torch.aten._softmax %64, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%66 = torch.aten.empty_like %65, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%67 = torch.operator "aten.bernoulli_.float"(%66, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%68 = torch.aten.div_.Scalar %67, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%69 = torch.aten.mul.Tensor %65, %68 : !torch.tensor, !torch.tensor -> !torch.tensor
%70 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%71 = torch.aten.expand %69, %70, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%72 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%73 = torch.aten.view %71, %72 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%74 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%75 = torch.aten.expand %44, %74, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%76 = torch.operator "aten.clone"(%75, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%77 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%78 = torch.operator "aten._unsafe_view"(%76, %77) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%79 = torch.aten.bmm %73, %78 : !torch.tensor, !torch.tensor -> !torch.tensor
%80 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%81 = torch.operator "aten._unsafe_view"(%79, %80) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%82 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%83 = torch.aten.permute %81, %82 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%84 = torch.operator "aten.clone"(%83, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%85 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%86 = torch.aten.view %84, %85 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%87 = torch.aten.t %arg8 : !torch.tensor -> !torch.tensor
%88 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%89 = torch.aten.view %86, %88 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%90 = torch.aten.mm %89, %87 : !torch.tensor, !torch.tensor -> !torch.tensor
%91 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%92 = torch.operator "aten._unsafe_view"(%90, %91) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%93 = torch.aten.add_.Tensor %92, %arg7, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%94 = torch.aten.empty_like %93, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%95 = torch.operator "aten.bernoulli_.float"(%94, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%96 = torch.aten.div_.Scalar %95, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%97 = torch.aten.mul.Tensor %93, %96 : !torch.tensor, !torch.tensor -> !torch.tensor
%98 = torch.aten.add.Tensor %97, %15, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%99 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_0, %result1_1, %result2_2 = torch.aten.native_layer_norm %98, %99, %arg6, %arg5, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%100 = torch.aten.t %arg16 : !torch.tensor -> !torch.tensor
%101 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%102 = torch.aten.view %result0_0, %101 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%103 = torch.aten.mm %102, %100 : !torch.tensor, !torch.tensor -> !torch.tensor
%104 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%105 = torch.operator "aten._unsafe_view"(%103, %104) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%106 = torch.aten.add_.Tensor %105, %arg15, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%107 = torch.aten.gelu %106 : !torch.tensor -> !torch.tensor
%108 = torch.aten.t %arg20 : !torch.tensor -> !torch.tensor
%109 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%110 = torch.aten.view %107, %109 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%111 = torch.aten.mm %110, %108 : !torch.tensor, !torch.tensor -> !torch.tensor
%112 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%113 = torch.operator "aten._unsafe_view"(%111, %112) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%114 = torch.aten.add_.Tensor %113, %arg19, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%115 = torch.aten.empty_like %114, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%116 = torch.operator "aten.bernoulli_.float"(%115, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%117 = torch.aten.div_.Scalar %116, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%118 = torch.aten.mul.Tensor %114, %117 : !torch.tensor, !torch.tensor -> !torch.tensor
%119 = torch.aten.add.Tensor %118, %result0_0, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%120 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_3, %result1_4, %result2_5 = torch.aten.native_layer_norm %119, %120, %arg18, %arg17, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%121 = torch.aten.t %arg28 : !torch.tensor -> !torch.tensor
%122 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%123 = torch.aten.view %result0_3, %122 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%124 = torch.aten.mm %123, %121 : !torch.tensor, !torch.tensor -> !torch.tensor
%125 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%126 = torch.operator "aten._unsafe_view"(%124, %125) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%127 = torch.aten.add_.Tensor %126, %arg27, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%128 = torch.aten.t %arg26 : !torch.tensor -> !torch.tensor
%129 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%130 = torch.aten.view %result0_3, %129 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%131 = torch.aten.mm %130, %128 : !torch.tensor, !torch.tensor -> !torch.tensor
%132 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%133 = torch.operator "aten._unsafe_view"(%131, %132) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%134 = torch.aten.add_.Tensor %133, %arg25, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%135 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%136 = torch.aten.view %134, %135 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%137 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%138 = torch.aten.permute %136, %137 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%139 = torch.aten.t %arg30 : !torch.tensor -> !torch.tensor
%140 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%141 = torch.aten.view %result0_3, %140 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%142 = torch.aten.mm %141, %139 : !torch.tensor, !torch.tensor -> !torch.tensor
%143 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%144 = torch.operator "aten._unsafe_view"(%142, %143) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%145 = torch.aten.add_.Tensor %144, %arg29, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%146 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%147 = torch.aten.view %145, %146 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%148 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%149 = torch.aten.permute %147, %148 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%150 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%151 = torch.aten.view %127, %150 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%152 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%153 = torch.aten.permute %151, %152 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%154 = torch.aten.transpose.int %138, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%155 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%156 = torch.aten.expand %153, %155, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%157 = torch.operator "aten.clone"(%156, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%158 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%159 = torch.operator "aten._unsafe_view"(%157, %158) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%160 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%161 = torch.aten.expand %154, %160, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%162 = torch.operator "aten.clone"(%161, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%163 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%164 = torch.operator "aten._unsafe_view"(%162, %163) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%165 = torch.aten.bmm %159, %164 : !torch.tensor, !torch.tensor -> !torch.tensor
%166 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%167 = torch.operator "aten._unsafe_view"(%165, %166) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%168 = torch.aten.div.Tensor %167, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%169 = torch.aten.add.Tensor %168, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%170 = torch.aten._softmax %169, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%171 = torch.aten.empty_like %170, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%172 = torch.operator "aten.bernoulli_.float"(%171, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%173 = torch.aten.div_.Scalar %172, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%174 = torch.aten.mul.Tensor %170, %173 : !torch.tensor, !torch.tensor -> !torch.tensor
%175 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%176 = torch.aten.expand %174, %175, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%177 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%178 = torch.aten.view %176, %177 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%179 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%180 = torch.aten.expand %149, %179, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%181 = torch.operator "aten.clone"(%180, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%182 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%183 = torch.operator "aten._unsafe_view"(%181, %182) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%184 = torch.aten.bmm %178, %183 : !torch.tensor, !torch.tensor -> !torch.tensor
%185 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%186 = torch.operator "aten._unsafe_view"(%184, %185) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%187 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%188 = torch.aten.permute %186, %187 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%189 = torch.operator "aten.clone"(%188, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%190 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%191 = torch.aten.view %189, %190 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%192 = torch.aten.t %arg24 : !torch.tensor -> !torch.tensor
%193 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%194 = torch.aten.view %191, %193 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%195 = torch.aten.mm %194, %192 : !torch.tensor, !torch.tensor -> !torch.tensor
%196 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%197 = torch.operator "aten._unsafe_view"(%195, %196) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%198 = torch.aten.add_.Tensor %197, %arg23, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%199 = torch.aten.empty_like %198, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%200 = torch.operator "aten.bernoulli_.float"(%199, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%201 = torch.aten.div_.Scalar %200, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%202 = torch.aten.mul.Tensor %198, %201 : !torch.tensor, !torch.tensor -> !torch.tensor
%203 = torch.aten.add.Tensor %202, %result0_3, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%204 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_6, %result1_7, %result2_8 = torch.aten.native_layer_norm %203, %204, %arg22, %arg21, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%205 = torch.aten.t %arg32 : !torch.tensor -> !torch.tensor
%206 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%207 = torch.aten.view %result0_6, %206 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%208 = torch.aten.mm %207, %205 : !torch.tensor, !torch.tensor -> !torch.tensor
%209 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%210 = torch.operator "aten._unsafe_view"(%208, %209) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%211 = torch.aten.add_.Tensor %210, %arg31, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%212 = torch.aten.gelu %211 : !torch.tensor -> !torch.tensor
%213 = torch.aten.t %arg36 : !torch.tensor -> !torch.tensor
%214 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%215 = torch.aten.view %212, %214 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%216 = torch.aten.mm %215, %213 : !torch.tensor, !torch.tensor -> !torch.tensor
%217 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%218 = torch.operator "aten._unsafe_view"(%216, %217) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%219 = torch.aten.add_.Tensor %218, %arg35, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%220 = torch.aten.empty_like %219, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%221 = torch.operator "aten.bernoulli_.float"(%220, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%222 = torch.aten.div_.Scalar %221, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%223 = torch.aten.mul.Tensor %219, %222 : !torch.tensor, !torch.tensor -> !torch.tensor
%224 = torch.aten.add.Tensor %223, %result0_6, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%225 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_9, %result1_10, %result2_11 = torch.aten.native_layer_norm %224, %225, %arg34, %arg33, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%226 = torch.aten.t %arg76 : !torch.tensor -> !torch.tensor
%227 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%228 = torch.aten.view %result0_9, %227 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%229 = torch.aten.mm %228, %226 : !torch.tensor, !torch.tensor -> !torch.tensor
%230 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%231 = torch.operator "aten._unsafe_view"(%229, %230) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%232 = torch.aten.add_.Tensor %231, %arg75, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%233 = torch.aten.t %arg74 : !torch.tensor -> !torch.tensor
%234 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%235 = torch.aten.view %result0_9, %234 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%236 = torch.aten.mm %235, %233 : !torch.tensor, !torch.tensor -> !torch.tensor
%237 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%238 = torch.operator "aten._unsafe_view"(%236, %237) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%239 = torch.aten.add_.Tensor %238, %arg73, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%240 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%241 = torch.aten.view %239, %240 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%242 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%243 = torch.aten.permute %241, %242 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%244 = torch.aten.t %arg78 : !torch.tensor -> !torch.tensor
%245 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%246 = torch.aten.view %result0_9, %245 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%247 = torch.aten.mm %246, %244 : !torch.tensor, !torch.tensor -> !torch.tensor
%248 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%249 = torch.operator "aten._unsafe_view"(%247, %248) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%250 = torch.aten.add_.Tensor %249, %arg77, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%251 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%252 = torch.aten.view %250, %251 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%253 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%254 = torch.aten.permute %252, %253 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%255 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%256 = torch.aten.view %232, %255 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%257 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%258 = torch.aten.permute %256, %257 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%259 = torch.aten.transpose.int %243, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%260 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%261 = torch.aten.expand %258, %260, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%262 = torch.operator "aten.clone"(%261, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%263 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%264 = torch.operator "aten._unsafe_view"(%262, %263) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%265 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%266 = torch.aten.expand %259, %265, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%267 = torch.operator "aten.clone"(%266, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%268 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%269 = torch.operator "aten._unsafe_view"(%267, %268) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%270 = torch.aten.bmm %264, %269 : !torch.tensor, !torch.tensor -> !torch.tensor
%271 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%272 = torch.operator "aten._unsafe_view"(%270, %271) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%273 = torch.aten.div.Tensor %272, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%274 = torch.aten.add.Tensor %273, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%275 = torch.aten._softmax %274, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%276 = torch.aten.empty_like %275, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%277 = torch.operator "aten.bernoulli_.float"(%276, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%278 = torch.aten.div_.Scalar %277, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%279 = torch.aten.mul.Tensor %275, %278 : !torch.tensor, !torch.tensor -> !torch.tensor
%280 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%281 = torch.aten.expand %279, %280, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%282 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%283 = torch.aten.view %281, %282 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%284 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%285 = torch.aten.expand %254, %284, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%286 = torch.operator "aten.clone"(%285, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%287 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%288 = torch.operator "aten._unsafe_view"(%286, %287) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%289 = torch.aten.bmm %283, %288 : !torch.tensor, !torch.tensor -> !torch.tensor
%290 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%291 = torch.operator "aten._unsafe_view"(%289, %290) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%292 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%293 = torch.aten.permute %291, %292 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%294 = torch.operator "aten.clone"(%293, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%295 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%296 = torch.aten.view %294, %295 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%297 = torch.aten.t %arg72 : !torch.tensor -> !torch.tensor
%298 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%299 = torch.aten.view %296, %298 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%300 = torch.aten.mm %299, %297 : !torch.tensor, !torch.tensor -> !torch.tensor
%301 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%302 = torch.operator "aten._unsafe_view"(%300, %301) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%303 = torch.aten.add_.Tensor %302, %arg71, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%304 = torch.aten.empty_like %303, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%305 = torch.operator "aten.bernoulli_.float"(%304, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%306 = torch.aten.div_.Scalar %305, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%307 = torch.aten.mul.Tensor %303, %306 : !torch.tensor, !torch.tensor -> !torch.tensor
%308 = torch.aten.add.Tensor %307, %result0_9, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%309 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_12, %result1_13, %result2_14 = torch.aten.native_layer_norm %308, %309, %arg70, %arg69, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%310 = torch.aten.t %arg80 : !torch.tensor -> !torch.tensor
%311 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%312 = torch.aten.view %result0_12, %311 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%313 = torch.aten.mm %312, %310 : !torch.tensor, !torch.tensor -> !torch.tensor
%314 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%315 = torch.operator "aten._unsafe_view"(%313, %314) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%316 = torch.aten.add_.Tensor %315, %arg79, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%317 = torch.aten.gelu %316 : !torch.tensor -> !torch.tensor
%318 = torch.aten.t %arg84 : !torch.tensor -> !torch.tensor
%319 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%320 = torch.aten.view %317, %319 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%321 = torch.aten.mm %320, %318 : !torch.tensor, !torch.tensor -> !torch.tensor
%322 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%323 = torch.operator "aten._unsafe_view"(%321, %322) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%324 = torch.aten.add_.Tensor %323, %arg83, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%325 = torch.aten.empty_like %324, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%326 = torch.operator "aten.bernoulli_.float"(%325, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%327 = torch.aten.div_.Scalar %326, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%328 = torch.aten.mul.Tensor %324, %327 : !torch.tensor, !torch.tensor -> !torch.tensor
%329 = torch.aten.add.Tensor %328, %result0_12, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%330 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_15, %result1_16, %result2_17 = torch.aten.native_layer_norm %329, %330, %arg82, %arg81, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%331 = torch.aten.t %arg92 : !torch.tensor -> !torch.tensor
%332 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%333 = torch.aten.view %result0_15, %332 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%334 = torch.aten.mm %333, %331 : !torch.tensor, !torch.tensor -> !torch.tensor
%335 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%336 = torch.operator "aten._unsafe_view"(%334, %335) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%337 = torch.aten.add_.Tensor %336, %arg91, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%338 = torch.aten.t %arg90 : !torch.tensor -> !torch.tensor
%339 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%340 = torch.aten.view %result0_15, %339 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%341 = torch.aten.mm %340, %338 : !torch.tensor, !torch.tensor -> !torch.tensor
%342 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%343 = torch.operator "aten._unsafe_view"(%341, %342) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%344 = torch.aten.add_.Tensor %343, %arg89, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%345 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%346 = torch.aten.view %344, %345 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%347 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%348 = torch.aten.permute %346, %347 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%349 = torch.aten.t %arg94 : !torch.tensor -> !torch.tensor
%350 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%351 = torch.aten.view %result0_15, %350 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%352 = torch.aten.mm %351, %349 : !torch.tensor, !torch.tensor -> !torch.tensor
%353 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%354 = torch.operator "aten._unsafe_view"(%352, %353) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%355 = torch.aten.add_.Tensor %354, %arg93, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%356 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%357 = torch.aten.view %355, %356 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%358 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%359 = torch.aten.permute %357, %358 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%360 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%361 = torch.aten.view %337, %360 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%362 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%363 = torch.aten.permute %361, %362 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%364 = torch.aten.transpose.int %348, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%365 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%366 = torch.aten.expand %363, %365, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%367 = torch.operator "aten.clone"(%366, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%368 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%369 = torch.operator "aten._unsafe_view"(%367, %368) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%370 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%371 = torch.aten.expand %364, %370, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%372 = torch.operator "aten.clone"(%371, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%373 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%374 = torch.operator "aten._unsafe_view"(%372, %373) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%375 = torch.aten.bmm %369, %374 : !torch.tensor, !torch.tensor -> !torch.tensor
%376 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%377 = torch.operator "aten._unsafe_view"(%375, %376) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%378 = torch.aten.div.Tensor %377, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%379 = torch.aten.add.Tensor %378, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%380 = torch.aten._softmax %379, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%381 = torch.aten.empty_like %380, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%382 = torch.operator "aten.bernoulli_.float"(%381, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%383 = torch.aten.div_.Scalar %382, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%384 = torch.aten.mul.Tensor %380, %383 : !torch.tensor, !torch.tensor -> !torch.tensor
%385 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%386 = torch.aten.expand %384, %385, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%387 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%388 = torch.aten.view %386, %387 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%389 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%390 = torch.aten.expand %359, %389, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%391 = torch.operator "aten.clone"(%390, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%392 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%393 = torch.operator "aten._unsafe_view"(%391, %392) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%394 = torch.aten.bmm %388, %393 : !torch.tensor, !torch.tensor -> !torch.tensor
%395 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%396 = torch.operator "aten._unsafe_view"(%394, %395) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%397 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%398 = torch.aten.permute %396, %397 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%399 = torch.operator "aten.clone"(%398, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%400 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%401 = torch.aten.view %399, %400 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%402 = torch.aten.t %arg88 : !torch.tensor -> !torch.tensor
%403 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%404 = torch.aten.view %401, %403 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%405 = torch.aten.mm %404, %402 : !torch.tensor, !torch.tensor -> !torch.tensor
%406 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%407 = torch.operator "aten._unsafe_view"(%405, %406) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%408 = torch.aten.add_.Tensor %407, %arg87, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%409 = torch.aten.empty_like %408, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%410 = torch.operator "aten.bernoulli_.float"(%409, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%411 = torch.aten.div_.Scalar %410, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%412 = torch.aten.mul.Tensor %408, %411 : !torch.tensor, !torch.tensor -> !torch.tensor
%413 = torch.aten.add.Tensor %412, %result0_15, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%414 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_18, %result1_19, %result2_20 = torch.aten.native_layer_norm %413, %414, %arg86, %arg85, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%415 = torch.aten.t %arg96 : !torch.tensor -> !torch.tensor
%416 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%417 = torch.aten.view %result0_18, %416 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%418 = torch.aten.mm %417, %415 : !torch.tensor, !torch.tensor -> !torch.tensor
%419 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%420 = torch.operator "aten._unsafe_view"(%418, %419) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%421 = torch.aten.add_.Tensor %420, %arg95, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%422 = torch.aten.gelu %421 : !torch.tensor -> !torch.tensor
%423 = torch.aten.t %arg100 : !torch.tensor -> !torch.tensor
%424 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%425 = torch.aten.view %422, %424 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%426 = torch.aten.mm %425, %423 : !torch.tensor, !torch.tensor -> !torch.tensor
%427 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%428 = torch.operator "aten._unsafe_view"(%426, %427) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%429 = torch.aten.add_.Tensor %428, %arg99, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%430 = torch.aten.empty_like %429, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%431 = torch.operator "aten.bernoulli_.float"(%430, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%432 = torch.aten.div_.Scalar %431, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%433 = torch.aten.mul.Tensor %429, %432 : !torch.tensor, !torch.tensor -> !torch.tensor
%434 = torch.aten.add.Tensor %433, %result0_18, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%435 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_21, %result1_22, %result2_23 = torch.aten.native_layer_norm %434, %435, %arg98, %arg97, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%436 = torch.aten.t %arg108 : !torch.tensor -> !torch.tensor
%437 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%438 = torch.aten.view %result0_21, %437 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%439 = torch.aten.mm %438, %436 : !torch.tensor, !torch.tensor -> !torch.tensor
%440 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%441 = torch.operator "aten._unsafe_view"(%439, %440) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%442 = torch.aten.add_.Tensor %441, %arg107, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%443 = torch.aten.t %arg106 : !torch.tensor -> !torch.tensor
%444 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%445 = torch.aten.view %result0_21, %444 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%446 = torch.aten.mm %445, %443 : !torch.tensor, !torch.tensor -> !torch.tensor
%447 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%448 = torch.operator "aten._unsafe_view"(%446, %447) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%449 = torch.aten.add_.Tensor %448, %arg105, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%450 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%451 = torch.aten.view %449, %450 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%452 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%453 = torch.aten.permute %451, %452 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%454 = torch.aten.t %arg110 : !torch.tensor -> !torch.tensor
%455 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%456 = torch.aten.view %result0_21, %455 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%457 = torch.aten.mm %456, %454 : !torch.tensor, !torch.tensor -> !torch.tensor
%458 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%459 = torch.operator "aten._unsafe_view"(%457, %458) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%460 = torch.aten.add_.Tensor %459, %arg109, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%461 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%462 = torch.aten.view %460, %461 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%463 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%464 = torch.aten.permute %462, %463 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%465 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%466 = torch.aten.view %442, %465 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%467 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%468 = torch.aten.permute %466, %467 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%469 = torch.aten.transpose.int %453, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%470 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%471 = torch.aten.expand %468, %470, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%472 = torch.operator "aten.clone"(%471, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%473 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%474 = torch.operator "aten._unsafe_view"(%472, %473) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%475 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%476 = torch.aten.expand %469, %475, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%477 = torch.operator "aten.clone"(%476, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%478 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%479 = torch.operator "aten._unsafe_view"(%477, %478) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%480 = torch.aten.bmm %474, %479 : !torch.tensor, !torch.tensor -> !torch.tensor
%481 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%482 = torch.operator "aten._unsafe_view"(%480, %481) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%483 = torch.aten.div.Tensor %482, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%484 = torch.aten.add.Tensor %483, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%485 = torch.aten._softmax %484, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%486 = torch.aten.empty_like %485, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%487 = torch.operator "aten.bernoulli_.float"(%486, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%488 = torch.aten.div_.Scalar %487, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%489 = torch.aten.mul.Tensor %485, %488 : !torch.tensor, !torch.tensor -> !torch.tensor
%490 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%491 = torch.aten.expand %489, %490, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%492 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%493 = torch.aten.view %491, %492 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%494 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%495 = torch.aten.expand %464, %494, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%496 = torch.operator "aten.clone"(%495, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%497 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%498 = torch.operator "aten._unsafe_view"(%496, %497) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%499 = torch.aten.bmm %493, %498 : !torch.tensor, !torch.tensor -> !torch.tensor
%500 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%501 = torch.operator "aten._unsafe_view"(%499, %500) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%502 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%503 = torch.aten.permute %501, %502 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%504 = torch.operator "aten.clone"(%503, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%505 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%506 = torch.aten.view %504, %505 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%507 = torch.aten.t %arg104 : !torch.tensor -> !torch.tensor
%508 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%509 = torch.aten.view %506, %508 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%510 = torch.aten.mm %509, %507 : !torch.tensor, !torch.tensor -> !torch.tensor
%511 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%512 = torch.operator "aten._unsafe_view"(%510, %511) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%513 = torch.aten.add_.Tensor %512, %arg103, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%514 = torch.aten.empty_like %513, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%515 = torch.operator "aten.bernoulli_.float"(%514, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%516 = torch.aten.div_.Scalar %515, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%517 = torch.aten.mul.Tensor %513, %516 : !torch.tensor, !torch.tensor -> !torch.tensor
%518 = torch.aten.add.Tensor %517, %result0_21, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%519 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_24, %result1_25, %result2_26 = torch.aten.native_layer_norm %518, %519, %arg102, %arg101, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%520 = torch.aten.t %arg112 : !torch.tensor -> !torch.tensor
%521 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%522 = torch.aten.view %result0_24, %521 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%523 = torch.aten.mm %522, %520 : !torch.tensor, !torch.tensor -> !torch.tensor
%524 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%525 = torch.operator "aten._unsafe_view"(%523, %524) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%526 = torch.aten.add_.Tensor %525, %arg111, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%527 = torch.aten.gelu %526 : !torch.tensor -> !torch.tensor
%528 = torch.aten.t %arg116 : !torch.tensor -> !torch.tensor
%529 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%530 = torch.aten.view %527, %529 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%531 = torch.aten.mm %530, %528 : !torch.tensor, !torch.tensor -> !torch.tensor
%532 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%533 = torch.operator "aten._unsafe_view"(%531, %532) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%534 = torch.aten.add_.Tensor %533, %arg115, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%535 = torch.aten.empty_like %534, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%536 = torch.operator "aten.bernoulli_.float"(%535, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%537 = torch.aten.div_.Scalar %536, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%538 = torch.aten.mul.Tensor %534, %537 : !torch.tensor, !torch.tensor -> !torch.tensor
%539 = torch.aten.add.Tensor %538, %result0_24, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%540 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_27, %result1_28, %result2_29 = torch.aten.native_layer_norm %539, %540, %arg114, %arg113, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%541 = torch.aten.t %arg124 : !torch.tensor -> !torch.tensor
%542 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%543 = torch.aten.view %result0_27, %542 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%544 = torch.aten.mm %543, %541 : !torch.tensor, !torch.tensor -> !torch.tensor
%545 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%546 = torch.operator "aten._unsafe_view"(%544, %545) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%547 = torch.aten.add_.Tensor %546, %arg123, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%548 = torch.aten.t %arg122 : !torch.tensor -> !torch.tensor
%549 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%550 = torch.aten.view %result0_27, %549 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%551 = torch.aten.mm %550, %548 : !torch.tensor, !torch.tensor -> !torch.tensor
%552 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%553 = torch.operator "aten._unsafe_view"(%551, %552) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%554 = torch.aten.add_.Tensor %553, %arg121, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%555 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%556 = torch.aten.view %554, %555 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%557 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%558 = torch.aten.permute %556, %557 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%559 = torch.aten.t %arg126 : !torch.tensor -> !torch.tensor
%560 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%561 = torch.aten.view %result0_27, %560 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%562 = torch.aten.mm %561, %559 : !torch.tensor, !torch.tensor -> !torch.tensor
%563 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%564 = torch.operator "aten._unsafe_view"(%562, %563) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%565 = torch.aten.add_.Tensor %564, %arg125, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%566 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%567 = torch.aten.view %565, %566 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%568 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%569 = torch.aten.permute %567, %568 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%570 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%571 = torch.aten.view %547, %570 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%572 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%573 = torch.aten.permute %571, %572 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%574 = torch.aten.transpose.int %558, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%575 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%576 = torch.aten.expand %573, %575, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%577 = torch.operator "aten.clone"(%576, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%578 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%579 = torch.operator "aten._unsafe_view"(%577, %578) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%580 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%581 = torch.aten.expand %574, %580, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%582 = torch.operator "aten.clone"(%581, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%583 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%584 = torch.operator "aten._unsafe_view"(%582, %583) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%585 = torch.aten.bmm %579, %584 : !torch.tensor, !torch.tensor -> !torch.tensor
%586 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%587 = torch.operator "aten._unsafe_view"(%585, %586) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%588 = torch.aten.div.Tensor %587, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%589 = torch.aten.add.Tensor %588, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%590 = torch.aten._softmax %589, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%591 = torch.aten.empty_like %590, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%592 = torch.operator "aten.bernoulli_.float"(%591, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%593 = torch.aten.div_.Scalar %592, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%594 = torch.aten.mul.Tensor %590, %593 : !torch.tensor, !torch.tensor -> !torch.tensor
%595 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%596 = torch.aten.expand %594, %595, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%597 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%598 = torch.aten.view %596, %597 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%599 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%600 = torch.aten.expand %569, %599, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%601 = torch.operator "aten.clone"(%600, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%602 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%603 = torch.operator "aten._unsafe_view"(%601, %602) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%604 = torch.aten.bmm %598, %603 : !torch.tensor, !torch.tensor -> !torch.tensor
%605 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%606 = torch.operator "aten._unsafe_view"(%604, %605) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%607 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%608 = torch.aten.permute %606, %607 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%609 = torch.operator "aten.clone"(%608, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%610 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%611 = torch.aten.view %609, %610 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%612 = torch.aten.t %arg120 : !torch.tensor -> !torch.tensor
%613 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%614 = torch.aten.view %611, %613 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%615 = torch.aten.mm %614, %612 : !torch.tensor, !torch.tensor -> !torch.tensor
%616 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%617 = torch.operator "aten._unsafe_view"(%615, %616) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%618 = torch.aten.add_.Tensor %617, %arg119, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%619 = torch.aten.empty_like %618, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%620 = torch.operator "aten.bernoulli_.float"(%619, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%621 = torch.aten.div_.Scalar %620, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%622 = torch.aten.mul.Tensor %618, %621 : !torch.tensor, !torch.tensor -> !torch.tensor
%623 = torch.aten.add.Tensor %622, %result0_27, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%624 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_30, %result1_31, %result2_32 = torch.aten.native_layer_norm %623, %624, %arg118, %arg117, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%625 = torch.aten.t %arg128 : !torch.tensor -> !torch.tensor
%626 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%627 = torch.aten.view %result0_30, %626 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%628 = torch.aten.mm %627, %625 : !torch.tensor, !torch.tensor -> !torch.tensor
%629 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%630 = torch.operator "aten._unsafe_view"(%628, %629) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%631 = torch.aten.add_.Tensor %630, %arg127, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%632 = torch.aten.gelu %631 : !torch.tensor -> !torch.tensor
%633 = torch.aten.t %arg132 : !torch.tensor -> !torch.tensor
%634 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%635 = torch.aten.view %632, %634 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%636 = torch.aten.mm %635, %633 : !torch.tensor, !torch.tensor -> !torch.tensor
%637 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%638 = torch.operator "aten._unsafe_view"(%636, %637) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%639 = torch.aten.add_.Tensor %638, %arg131, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%640 = torch.aten.empty_like %639, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%641 = torch.operator "aten.bernoulli_.float"(%640, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%642 = torch.aten.div_.Scalar %641, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%643 = torch.aten.mul.Tensor %639, %642 : !torch.tensor, !torch.tensor -> !torch.tensor
%644 = torch.aten.add.Tensor %643, %result0_30, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%645 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_33, %result1_34, %result2_35 = torch.aten.native_layer_norm %644, %645, %arg130, %arg129, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%646 = torch.aten.t %arg140 : !torch.tensor -> !torch.tensor
%647 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%648 = torch.aten.view %result0_33, %647 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%649 = torch.aten.mm %648, %646 : !torch.tensor, !torch.tensor -> !torch.tensor
%650 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%651 = torch.operator "aten._unsafe_view"(%649, %650) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%652 = torch.aten.add_.Tensor %651, %arg139, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%653 = torch.aten.t %arg138 : !torch.tensor -> !torch.tensor
%654 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%655 = torch.aten.view %result0_33, %654 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%656 = torch.aten.mm %655, %653 : !torch.tensor, !torch.tensor -> !torch.tensor
%657 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%658 = torch.operator "aten._unsafe_view"(%656, %657) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%659 = torch.aten.add_.Tensor %658, %arg137, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%660 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%661 = torch.aten.view %659, %660 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%662 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%663 = torch.aten.permute %661, %662 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%664 = torch.aten.t %arg142 : !torch.tensor -> !torch.tensor
%665 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%666 = torch.aten.view %result0_33, %665 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%667 = torch.aten.mm %666, %664 : !torch.tensor, !torch.tensor -> !torch.tensor
%668 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%669 = torch.operator "aten._unsafe_view"(%667, %668) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%670 = torch.aten.add_.Tensor %669, %arg141, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%671 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%672 = torch.aten.view %670, %671 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%673 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%674 = torch.aten.permute %672, %673 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%675 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%676 = torch.aten.view %652, %675 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%677 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%678 = torch.aten.permute %676, %677 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%679 = torch.aten.transpose.int %663, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%680 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%681 = torch.aten.expand %678, %680, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%682 = torch.operator "aten.clone"(%681, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%683 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%684 = torch.operator "aten._unsafe_view"(%682, %683) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%685 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%686 = torch.aten.expand %679, %685, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%687 = torch.operator "aten.clone"(%686, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%688 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%689 = torch.operator "aten._unsafe_view"(%687, %688) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%690 = torch.aten.bmm %684, %689 : !torch.tensor, !torch.tensor -> !torch.tensor
%691 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%692 = torch.operator "aten._unsafe_view"(%690, %691) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%693 = torch.aten.div.Tensor %692, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%694 = torch.aten.add.Tensor %693, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%695 = torch.aten._softmax %694, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%696 = torch.aten.empty_like %695, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%697 = torch.operator "aten.bernoulli_.float"(%696, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%698 = torch.aten.div_.Scalar %697, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%699 = torch.aten.mul.Tensor %695, %698 : !torch.tensor, !torch.tensor -> !torch.tensor
%700 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%701 = torch.aten.expand %699, %700, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%702 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%703 = torch.aten.view %701, %702 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%704 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%705 = torch.aten.expand %674, %704, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%706 = torch.operator "aten.clone"(%705, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%707 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%708 = torch.operator "aten._unsafe_view"(%706, %707) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%709 = torch.aten.bmm %703, %708 : !torch.tensor, !torch.tensor -> !torch.tensor
%710 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%711 = torch.operator "aten._unsafe_view"(%709, %710) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%712 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%713 = torch.aten.permute %711, %712 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%714 = torch.operator "aten.clone"(%713, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%715 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%716 = torch.aten.view %714, %715 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%717 = torch.aten.t %arg136 : !torch.tensor -> !torch.tensor
%718 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%719 = torch.aten.view %716, %718 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%720 = torch.aten.mm %719, %717 : !torch.tensor, !torch.tensor -> !torch.tensor
%721 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%722 = torch.operator "aten._unsafe_view"(%720, %721) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%723 = torch.aten.add_.Tensor %722, %arg135, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%724 = torch.aten.empty_like %723, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%725 = torch.operator "aten.bernoulli_.float"(%724, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%726 = torch.aten.div_.Scalar %725, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%727 = torch.aten.mul.Tensor %723, %726 : !torch.tensor, !torch.tensor -> !torch.tensor
%728 = torch.aten.add.Tensor %727, %result0_33, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%729 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_36, %result1_37, %result2_38 = torch.aten.native_layer_norm %728, %729, %arg134, %arg133, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%730 = torch.aten.t %arg144 : !torch.tensor -> !torch.tensor
%731 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%732 = torch.aten.view %result0_36, %731 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%733 = torch.aten.mm %732, %730 : !torch.tensor, !torch.tensor -> !torch.tensor
%734 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%735 = torch.operator "aten._unsafe_view"(%733, %734) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%736 = torch.aten.add_.Tensor %735, %arg143, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%737 = torch.aten.gelu %736 : !torch.tensor -> !torch.tensor
%738 = torch.aten.t %arg148 : !torch.tensor -> !torch.tensor
%739 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%740 = torch.aten.view %737, %739 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%741 = torch.aten.mm %740, %738 : !torch.tensor, !torch.tensor -> !torch.tensor
%742 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%743 = torch.operator "aten._unsafe_view"(%741, %742) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%744 = torch.aten.add_.Tensor %743, %arg147, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%745 = torch.aten.empty_like %744, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%746 = torch.operator "aten.bernoulli_.float"(%745, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%747 = torch.aten.div_.Scalar %746, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%748 = torch.aten.mul.Tensor %744, %747 : !torch.tensor, !torch.tensor -> !torch.tensor
%749 = torch.aten.add.Tensor %748, %result0_36, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%750 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_39, %result1_40, %result2_41 = torch.aten.native_layer_norm %749, %750, %arg146, %arg145, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%751 = torch.aten.t %arg156 : !torch.tensor -> !torch.tensor
%752 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%753 = torch.aten.view %result0_39, %752 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%754 = torch.aten.mm %753, %751 : !torch.tensor, !torch.tensor -> !torch.tensor
%755 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%756 = torch.operator "aten._unsafe_view"(%754, %755) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%757 = torch.aten.add_.Tensor %756, %arg155, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%758 = torch.aten.t %arg154 : !torch.tensor -> !torch.tensor
%759 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%760 = torch.aten.view %result0_39, %759 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%761 = torch.aten.mm %760, %758 : !torch.tensor, !torch.tensor -> !torch.tensor
%762 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%763 = torch.operator "aten._unsafe_view"(%761, %762) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%764 = torch.aten.add_.Tensor %763, %arg153, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%765 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%766 = torch.aten.view %764, %765 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%767 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%768 = torch.aten.permute %766, %767 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%769 = torch.aten.t %arg158 : !torch.tensor -> !torch.tensor
%770 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%771 = torch.aten.view %result0_39, %770 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%772 = torch.aten.mm %771, %769 : !torch.tensor, !torch.tensor -> !torch.tensor
%773 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%774 = torch.operator "aten._unsafe_view"(%772, %773) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%775 = torch.aten.add_.Tensor %774, %arg157, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%776 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%777 = torch.aten.view %775, %776 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%778 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%779 = torch.aten.permute %777, %778 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%780 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%781 = torch.aten.view %757, %780 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%782 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%783 = torch.aten.permute %781, %782 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%784 = torch.aten.transpose.int %768, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%785 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%786 = torch.aten.expand %783, %785, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%787 = torch.operator "aten.clone"(%786, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%788 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%789 = torch.operator "aten._unsafe_view"(%787, %788) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%790 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%791 = torch.aten.expand %784, %790, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%792 = torch.operator "aten.clone"(%791, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%793 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%794 = torch.operator "aten._unsafe_view"(%792, %793) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%795 = torch.aten.bmm %789, %794 : !torch.tensor, !torch.tensor -> !torch.tensor
%796 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%797 = torch.operator "aten._unsafe_view"(%795, %796) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%798 = torch.aten.div.Tensor %797, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%799 = torch.aten.add.Tensor %798, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%800 = torch.aten._softmax %799, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%801 = torch.aten.empty_like %800, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%802 = torch.operator "aten.bernoulli_.float"(%801, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%803 = torch.aten.div_.Scalar %802, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%804 = torch.aten.mul.Tensor %800, %803 : !torch.tensor, !torch.tensor -> !torch.tensor
%805 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%806 = torch.aten.expand %804, %805, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%807 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%808 = torch.aten.view %806, %807 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%809 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%810 = torch.aten.expand %779, %809, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%811 = torch.operator "aten.clone"(%810, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%812 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%813 = torch.operator "aten._unsafe_view"(%811, %812) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%814 = torch.aten.bmm %808, %813 : !torch.tensor, !torch.tensor -> !torch.tensor
%815 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%816 = torch.operator "aten._unsafe_view"(%814, %815) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%817 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%818 = torch.aten.permute %816, %817 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%819 = torch.operator "aten.clone"(%818, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%820 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%821 = torch.aten.view %819, %820 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%822 = torch.aten.t %arg152 : !torch.tensor -> !torch.tensor
%823 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%824 = torch.aten.view %821, %823 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%825 = torch.aten.mm %824, %822 : !torch.tensor, !torch.tensor -> !torch.tensor
%826 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%827 = torch.operator "aten._unsafe_view"(%825, %826) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%828 = torch.aten.add_.Tensor %827, %arg151, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%829 = torch.aten.empty_like %828, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%830 = torch.operator "aten.bernoulli_.float"(%829, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%831 = torch.aten.div_.Scalar %830, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%832 = torch.aten.mul.Tensor %828, %831 : !torch.tensor, !torch.tensor -> !torch.tensor
%833 = torch.aten.add.Tensor %832, %result0_39, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%834 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_42, %result1_43, %result2_44 = torch.aten.native_layer_norm %833, %834, %arg150, %arg149, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%835 = torch.aten.t %arg160 : !torch.tensor -> !torch.tensor
%836 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%837 = torch.aten.view %result0_42, %836 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%838 = torch.aten.mm %837, %835 : !torch.tensor, !torch.tensor -> !torch.tensor
%839 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%840 = torch.operator "aten._unsafe_view"(%838, %839) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%841 = torch.aten.add_.Tensor %840, %arg159, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%842 = torch.aten.gelu %841 : !torch.tensor -> !torch.tensor
%843 = torch.aten.t %arg164 : !torch.tensor -> !torch.tensor
%844 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%845 = torch.aten.view %842, %844 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%846 = torch.aten.mm %845, %843 : !torch.tensor, !torch.tensor -> !torch.tensor
%847 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%848 = torch.operator "aten._unsafe_view"(%846, %847) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%849 = torch.aten.add_.Tensor %848, %arg163, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%850 = torch.aten.empty_like %849, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%851 = torch.operator "aten.bernoulli_.float"(%850, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%852 = torch.aten.div_.Scalar %851, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%853 = torch.aten.mul.Tensor %849, %852 : !torch.tensor, !torch.tensor -> !torch.tensor
%854 = torch.aten.add.Tensor %853, %result0_42, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%855 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_45, %result1_46, %result2_47 = torch.aten.native_layer_norm %854, %855, %arg162, %arg161, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%856 = torch.aten.t %arg172 : !torch.tensor -> !torch.tensor
%857 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%858 = torch.aten.view %result0_45, %857 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%859 = torch.aten.mm %858, %856 : !torch.tensor, !torch.tensor -> !torch.tensor
%860 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%861 = torch.operator "aten._unsafe_view"(%859, %860) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%862 = torch.aten.add_.Tensor %861, %arg171, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%863 = torch.aten.t %arg170 : !torch.tensor -> !torch.tensor
%864 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%865 = torch.aten.view %result0_45, %864 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%866 = torch.aten.mm %865, %863 : !torch.tensor, !torch.tensor -> !torch.tensor
%867 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%868 = torch.operator "aten._unsafe_view"(%866, %867) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%869 = torch.aten.add_.Tensor %868, %arg169, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%870 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%871 = torch.aten.view %869, %870 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%872 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%873 = torch.aten.permute %871, %872 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%874 = torch.aten.t %arg174 : !torch.tensor -> !torch.tensor
%875 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%876 = torch.aten.view %result0_45, %875 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%877 = torch.aten.mm %876, %874 : !torch.tensor, !torch.tensor -> !torch.tensor
%878 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%879 = torch.operator "aten._unsafe_view"(%877, %878) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%880 = torch.aten.add_.Tensor %879, %arg173, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%881 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%882 = torch.aten.view %880, %881 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%883 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%884 = torch.aten.permute %882, %883 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%885 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%886 = torch.aten.view %862, %885 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%887 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%888 = torch.aten.permute %886, %887 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%889 = torch.aten.transpose.int %873, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%890 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%891 = torch.aten.expand %888, %890, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%892 = torch.operator "aten.clone"(%891, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%893 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%894 = torch.operator "aten._unsafe_view"(%892, %893) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%895 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%896 = torch.aten.expand %889, %895, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%897 = torch.operator "aten.clone"(%896, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%898 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%899 = torch.operator "aten._unsafe_view"(%897, %898) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%900 = torch.aten.bmm %894, %899 : !torch.tensor, !torch.tensor -> !torch.tensor
%901 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%902 = torch.operator "aten._unsafe_view"(%900, %901) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%903 = torch.aten.div.Tensor %902, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%904 = torch.aten.add.Tensor %903, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%905 = torch.aten._softmax %904, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%906 = torch.aten.empty_like %905, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%907 = torch.operator "aten.bernoulli_.float"(%906, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%908 = torch.aten.div_.Scalar %907, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%909 = torch.aten.mul.Tensor %905, %908 : !torch.tensor, !torch.tensor -> !torch.tensor
%910 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%911 = torch.aten.expand %909, %910, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%912 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%913 = torch.aten.view %911, %912 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%914 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%915 = torch.aten.expand %884, %914, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%916 = torch.operator "aten.clone"(%915, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%917 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%918 = torch.operator "aten._unsafe_view"(%916, %917) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%919 = torch.aten.bmm %913, %918 : !torch.tensor, !torch.tensor -> !torch.tensor
%920 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%921 = torch.operator "aten._unsafe_view"(%919, %920) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%922 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%923 = torch.aten.permute %921, %922 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%924 = torch.operator "aten.clone"(%923, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%925 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%926 = torch.aten.view %924, %925 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%927 = torch.aten.t %arg168 : !torch.tensor -> !torch.tensor
%928 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%929 = torch.aten.view %926, %928 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%930 = torch.aten.mm %929, %927 : !torch.tensor, !torch.tensor -> !torch.tensor
%931 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%932 = torch.operator "aten._unsafe_view"(%930, %931) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%933 = torch.aten.add_.Tensor %932, %arg167, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%934 = torch.aten.empty_like %933, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%935 = torch.operator "aten.bernoulli_.float"(%934, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%936 = torch.aten.div_.Scalar %935, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%937 = torch.aten.mul.Tensor %933, %936 : !torch.tensor, !torch.tensor -> !torch.tensor
%938 = torch.aten.add.Tensor %937, %result0_45, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%939 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_48, %result1_49, %result2_50 = torch.aten.native_layer_norm %938, %939, %arg166, %arg165, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%940 = torch.aten.t %arg176 : !torch.tensor -> !torch.tensor
%941 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%942 = torch.aten.view %result0_48, %941 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%943 = torch.aten.mm %942, %940 : !torch.tensor, !torch.tensor -> !torch.tensor
%944 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%945 = torch.operator "aten._unsafe_view"(%943, %944) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%946 = torch.aten.add_.Tensor %945, %arg175, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%947 = torch.aten.gelu %946 : !torch.tensor -> !torch.tensor
%948 = torch.aten.t %arg180 : !torch.tensor -> !torch.tensor
%949 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%950 = torch.aten.view %947, %949 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%951 = torch.aten.mm %950, %948 : !torch.tensor, !torch.tensor -> !torch.tensor
%952 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%953 = torch.operator "aten._unsafe_view"(%951, %952) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%954 = torch.aten.add_.Tensor %953, %arg179, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%955 = torch.aten.empty_like %954, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%956 = torch.operator "aten.bernoulli_.float"(%955, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%957 = torch.aten.div_.Scalar %956, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%958 = torch.aten.mul.Tensor %954, %957 : !torch.tensor, !torch.tensor -> !torch.tensor
%959 = torch.aten.add.Tensor %958, %result0_48, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%960 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_51, %result1_52, %result2_53 = torch.aten.native_layer_norm %959, %960, %arg178, %arg177, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%961 = torch.aten.t %arg188 : !torch.tensor -> !torch.tensor
%962 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%963 = torch.aten.view %result0_51, %962 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%964 = torch.aten.mm %963, %961 : !torch.tensor, !torch.tensor -> !torch.tensor
%965 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%966 = torch.operator "aten._unsafe_view"(%964, %965) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%967 = torch.aten.add_.Tensor %966, %arg187, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%968 = torch.aten.t %arg186 : !torch.tensor -> !torch.tensor
%969 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%970 = torch.aten.view %result0_51, %969 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%971 = torch.aten.mm %970, %968 : !torch.tensor, !torch.tensor -> !torch.tensor
%972 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%973 = torch.operator "aten._unsafe_view"(%971, %972) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%974 = torch.aten.add_.Tensor %973, %arg185, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%975 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%976 = torch.aten.view %974, %975 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%977 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%978 = torch.aten.permute %976, %977 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%979 = torch.aten.t %arg190 : !torch.tensor -> !torch.tensor
%980 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%981 = torch.aten.view %result0_51, %980 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%982 = torch.aten.mm %981, %979 : !torch.tensor, !torch.tensor -> !torch.tensor
%983 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%984 = torch.operator "aten._unsafe_view"(%982, %983) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%985 = torch.aten.add_.Tensor %984, %arg189, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%986 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%987 = torch.aten.view %985, %986 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%988 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%989 = torch.aten.permute %987, %988 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%990 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%991 = torch.aten.view %967, %990 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%992 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%993 = torch.aten.permute %991, %992 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%994 = torch.aten.transpose.int %978, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%995 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%996 = torch.aten.expand %993, %995, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%997 = torch.operator "aten.clone"(%996, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%998 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%999 = torch.operator "aten._unsafe_view"(%997, %998) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1000 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1001 = torch.aten.expand %994, %1000, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1002 = torch.operator "aten.clone"(%1001, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1003 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1004 = torch.operator "aten._unsafe_view"(%1002, %1003) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1005 = torch.aten.bmm %999, %1004 : !torch.tensor, !torch.tensor -> !torch.tensor
%1006 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1007 = torch.operator "aten._unsafe_view"(%1005, %1006) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1008 = torch.aten.div.Tensor %1007, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1009 = torch.aten.add.Tensor %1008, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1010 = torch.aten._softmax %1009, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1011 = torch.aten.empty_like %1010, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1012 = torch.operator "aten.bernoulli_.float"(%1011, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1013 = torch.aten.div_.Scalar %1012, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1014 = torch.aten.mul.Tensor %1010, %1013 : !torch.tensor, !torch.tensor -> !torch.tensor
%1015 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1016 = torch.aten.expand %1014, %1015, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1017 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1018 = torch.aten.view %1016, %1017 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1019 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1020 = torch.aten.expand %989, %1019, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1021 = torch.operator "aten.clone"(%1020, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1022 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1023 = torch.operator "aten._unsafe_view"(%1021, %1022) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1024 = torch.aten.bmm %1018, %1023 : !torch.tensor, !torch.tensor -> !torch.tensor
%1025 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1026 = torch.operator "aten._unsafe_view"(%1024, %1025) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1027 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1028 = torch.aten.permute %1026, %1027 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1029 = torch.operator "aten.clone"(%1028, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1030 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1031 = torch.aten.view %1029, %1030 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1032 = torch.aten.t %arg184 : !torch.tensor -> !torch.tensor
%1033 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1034 = torch.aten.view %1031, %1033 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1035 = torch.aten.mm %1034, %1032 : !torch.tensor, !torch.tensor -> !torch.tensor
%1036 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1037 = torch.operator "aten._unsafe_view"(%1035, %1036) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1038 = torch.aten.add_.Tensor %1037, %arg183, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1039 = torch.aten.empty_like %1038, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1040 = torch.operator "aten.bernoulli_.float"(%1039, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1041 = torch.aten.div_.Scalar %1040, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1042 = torch.aten.mul.Tensor %1038, %1041 : !torch.tensor, !torch.tensor -> !torch.tensor
%1043 = torch.aten.add.Tensor %1042, %result0_51, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1044 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_54, %result1_55, %result2_56 = torch.aten.native_layer_norm %1043, %1044, %arg182, %arg181, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1045 = torch.aten.t %arg192 : !torch.tensor -> !torch.tensor
%1046 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1047 = torch.aten.view %result0_54, %1046 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1048 = torch.aten.mm %1047, %1045 : !torch.tensor, !torch.tensor -> !torch.tensor
%1049 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1050 = torch.operator "aten._unsafe_view"(%1048, %1049) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1051 = torch.aten.add_.Tensor %1050, %arg191, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1052 = torch.aten.gelu %1051 : !torch.tensor -> !torch.tensor
%1053 = torch.aten.t %arg196 : !torch.tensor -> !torch.tensor
%1054 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1055 = torch.aten.view %1052, %1054 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1056 = torch.aten.mm %1055, %1053 : !torch.tensor, !torch.tensor -> !torch.tensor
%1057 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1058 = torch.operator "aten._unsafe_view"(%1056, %1057) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1059 = torch.aten.add_.Tensor %1058, %arg195, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1060 = torch.aten.empty_like %1059, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1061 = torch.operator "aten.bernoulli_.float"(%1060, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1062 = torch.aten.div_.Scalar %1061, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1063 = torch.aten.mul.Tensor %1059, %1062 : !torch.tensor, !torch.tensor -> !torch.tensor
%1064 = torch.aten.add.Tensor %1063, %result0_54, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1065 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_57, %result1_58, %result2_59 = torch.aten.native_layer_norm %1064, %1065, %arg194, %arg193, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1066 = torch.aten.t %arg44 : !torch.tensor -> !torch.tensor
%1067 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1068 = torch.aten.view %result0_57, %1067 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1069 = torch.aten.mm %1068, %1066 : !torch.tensor, !torch.tensor -> !torch.tensor
%1070 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1071 = torch.operator "aten._unsafe_view"(%1069, %1070) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1072 = torch.aten.add_.Tensor %1071, %arg43, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1073 = torch.aten.t %arg42 : !torch.tensor -> !torch.tensor
%1074 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1075 = torch.aten.view %result0_57, %1074 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1076 = torch.aten.mm %1075, %1073 : !torch.tensor, !torch.tensor -> !torch.tensor
%1077 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1078 = torch.operator "aten._unsafe_view"(%1076, %1077) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1079 = torch.aten.add_.Tensor %1078, %arg41, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1080 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1081 = torch.aten.view %1079, %1080 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1082 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1083 = torch.aten.permute %1081, %1082 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1084 = torch.aten.t %arg46 : !torch.tensor -> !torch.tensor
%1085 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1086 = torch.aten.view %result0_57, %1085 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1087 = torch.aten.mm %1086, %1084 : !torch.tensor, !torch.tensor -> !torch.tensor
%1088 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1089 = torch.operator "aten._unsafe_view"(%1087, %1088) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1090 = torch.aten.add_.Tensor %1089, %arg45, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1091 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1092 = torch.aten.view %1090, %1091 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1093 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1094 = torch.aten.permute %1092, %1093 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1095 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1096 = torch.aten.view %1072, %1095 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1097 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1098 = torch.aten.permute %1096, %1097 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1099 = torch.aten.transpose.int %1083, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1100 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1101 = torch.aten.expand %1098, %1100, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1102 = torch.operator "aten.clone"(%1101, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1103 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1104 = torch.operator "aten._unsafe_view"(%1102, %1103) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1105 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1106 = torch.aten.expand %1099, %1105, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1107 = torch.operator "aten.clone"(%1106, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1108 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1109 = torch.operator "aten._unsafe_view"(%1107, %1108) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1110 = torch.aten.bmm %1104, %1109 : !torch.tensor, !torch.tensor -> !torch.tensor
%1111 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1112 = torch.operator "aten._unsafe_view"(%1110, %1111) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1113 = torch.aten.div.Tensor %1112, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1114 = torch.aten.add.Tensor %1113, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1115 = torch.aten._softmax %1114, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1116 = torch.aten.empty_like %1115, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1117 = torch.operator "aten.bernoulli_.float"(%1116, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1118 = torch.aten.div_.Scalar %1117, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1119 = torch.aten.mul.Tensor %1115, %1118 : !torch.tensor, !torch.tensor -> !torch.tensor
%1120 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1121 = torch.aten.expand %1119, %1120, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1122 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1123 = torch.aten.view %1121, %1122 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1124 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1125 = torch.aten.expand %1094, %1124, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1126 = torch.operator "aten.clone"(%1125, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1127 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1128 = torch.operator "aten._unsafe_view"(%1126, %1127) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1129 = torch.aten.bmm %1123, %1128 : !torch.tensor, !torch.tensor -> !torch.tensor
%1130 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1131 = torch.operator "aten._unsafe_view"(%1129, %1130) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1132 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1133 = torch.aten.permute %1131, %1132 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1134 = torch.operator "aten.clone"(%1133, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1135 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1136 = torch.aten.view %1134, %1135 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1137 = torch.aten.t %arg40 : !torch.tensor -> !torch.tensor
%1138 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1139 = torch.aten.view %1136, %1138 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1140 = torch.aten.mm %1139, %1137 : !torch.tensor, !torch.tensor -> !torch.tensor
%1141 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1142 = torch.operator "aten._unsafe_view"(%1140, %1141) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1143 = torch.aten.add_.Tensor %1142, %arg39, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1144 = torch.aten.empty_like %1143, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1145 = torch.operator "aten.bernoulli_.float"(%1144, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1146 = torch.aten.div_.Scalar %1145, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1147 = torch.aten.mul.Tensor %1143, %1146 : !torch.tensor, !torch.tensor -> !torch.tensor
%1148 = torch.aten.add.Tensor %1147, %result0_57, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1149 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_60, %result1_61, %result2_62 = torch.aten.native_layer_norm %1148, %1149, %arg38, %arg37, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1150 = torch.aten.t %arg48 : !torch.tensor -> !torch.tensor
%1151 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1152 = torch.aten.view %result0_60, %1151 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1153 = torch.aten.mm %1152, %1150 : !torch.tensor, !torch.tensor -> !torch.tensor
%1154 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1155 = torch.operator "aten._unsafe_view"(%1153, %1154) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1156 = torch.aten.add_.Tensor %1155, %arg47, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1157 = torch.aten.gelu %1156 : !torch.tensor -> !torch.tensor
%1158 = torch.aten.t %arg52 : !torch.tensor -> !torch.tensor
%1159 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1160 = torch.aten.view %1157, %1159 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1161 = torch.aten.mm %1160, %1158 : !torch.tensor, !torch.tensor -> !torch.tensor
%1162 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1163 = torch.operator "aten._unsafe_view"(%1161, %1162) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1164 = torch.aten.add_.Tensor %1163, %arg51, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1165 = torch.aten.empty_like %1164, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1166 = torch.operator "aten.bernoulli_.float"(%1165, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1167 = torch.aten.div_.Scalar %1166, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1168 = torch.aten.mul.Tensor %1164, %1167 : !torch.tensor, !torch.tensor -> !torch.tensor
%1169 = torch.aten.add.Tensor %1168, %result0_60, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1170 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_63, %result1_64, %result2_65 = torch.aten.native_layer_norm %1169, %1170, %arg50, %arg49, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1171 = torch.aten.t %arg60 : !torch.tensor -> !torch.tensor
%1172 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1173 = torch.aten.view %result0_63, %1172 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1174 = torch.aten.mm %1173, %1171 : !torch.tensor, !torch.tensor -> !torch.tensor
%1175 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1176 = torch.operator "aten._unsafe_view"(%1174, %1175) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1177 = torch.aten.add_.Tensor %1176, %arg59, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1178 = torch.aten.t %arg58 : !torch.tensor -> !torch.tensor
%1179 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1180 = torch.aten.view %result0_63, %1179 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1181 = torch.aten.mm %1180, %1178 : !torch.tensor, !torch.tensor -> !torch.tensor
%1182 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1183 = torch.operator "aten._unsafe_view"(%1181, %1182) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1184 = torch.aten.add_.Tensor %1183, %arg57, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1185 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1186 = torch.aten.view %1184, %1185 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1187 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1188 = torch.aten.permute %1186, %1187 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1189 = torch.aten.t %arg62 : !torch.tensor -> !torch.tensor
%1190 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1191 = torch.aten.view %result0_63, %1190 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1192 = torch.aten.mm %1191, %1189 : !torch.tensor, !torch.tensor -> !torch.tensor
%1193 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1194 = torch.operator "aten._unsafe_view"(%1192, %1193) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1195 = torch.aten.add_.Tensor %1194, %arg61, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1196 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1197 = torch.aten.view %1195, %1196 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1198 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1199 = torch.aten.permute %1197, %1198 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1200 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1201 = torch.aten.view %1177, %1200 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1202 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1203 = torch.aten.permute %1201, %1202 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1204 = torch.aten.transpose.int %1188, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1205 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1206 = torch.aten.expand %1203, %1205, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1207 = torch.operator "aten.clone"(%1206, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1208 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1209 = torch.operator "aten._unsafe_view"(%1207, %1208) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1210 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1211 = torch.aten.expand %1204, %1210, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1212 = torch.operator "aten.clone"(%1211, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1213 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1214 = torch.operator "aten._unsafe_view"(%1212, %1213) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1215 = torch.aten.bmm %1209, %1214 : !torch.tensor, !torch.tensor -> !torch.tensor
%1216 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1217 = torch.operator "aten._unsafe_view"(%1215, %1216) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1218 = torch.aten.div.Tensor %1217, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1219 = torch.aten.add.Tensor %1218, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1220 = torch.aten._softmax %1219, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1221 = torch.aten.empty_like %1220, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1222 = torch.operator "aten.bernoulli_.float"(%1221, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1223 = torch.aten.div_.Scalar %1222, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1224 = torch.aten.mul.Tensor %1220, %1223 : !torch.tensor, !torch.tensor -> !torch.tensor
%1225 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1226 = torch.aten.expand %1224, %1225, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1227 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1228 = torch.aten.view %1226, %1227 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1229 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1230 = torch.aten.expand %1199, %1229, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1231 = torch.operator "aten.clone"(%1230, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1232 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1233 = torch.operator "aten._unsafe_view"(%1231, %1232) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1234 = torch.aten.bmm %1228, %1233 : !torch.tensor, !torch.tensor -> !torch.tensor
%1235 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1236 = torch.operator "aten._unsafe_view"(%1234, %1235) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1237 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1238 = torch.aten.permute %1236, %1237 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1239 = torch.operator "aten.clone"(%1238, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1240 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1241 = torch.aten.view %1239, %1240 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1242 = torch.aten.t %arg56 : !torch.tensor -> !torch.tensor
%1243 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1244 = torch.aten.view %1241, %1243 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1245 = torch.aten.mm %1244, %1242 : !torch.tensor, !torch.tensor -> !torch.tensor
%1246 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1247 = torch.operator "aten._unsafe_view"(%1245, %1246) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1248 = torch.aten.add_.Tensor %1247, %arg55, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1249 = torch.aten.empty_like %1248, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1250 = torch.operator "aten.bernoulli_.float"(%1249, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1251 = torch.aten.div_.Scalar %1250, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1252 = torch.aten.mul.Tensor %1248, %1251 : !torch.tensor, !torch.tensor -> !torch.tensor
%1253 = torch.aten.add.Tensor %1252, %result0_63, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1254 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_66, %result1_67, %result2_68 = torch.aten.native_layer_norm %1253, %1254, %arg54, %arg53, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1255 = torch.aten.t %arg64 : !torch.tensor -> !torch.tensor
%1256 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1257 = torch.aten.view %result0_66, %1256 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1258 = torch.aten.mm %1257, %1255 : !torch.tensor, !torch.tensor -> !torch.tensor
%1259 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1260 = torch.operator "aten._unsafe_view"(%1258, %1259) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1261 = torch.aten.add_.Tensor %1260, %arg63, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1262 = torch.aten.gelu %1261 : !torch.tensor -> !torch.tensor
%1263 = torch.aten.t %arg68 : !torch.tensor -> !torch.tensor
%1264 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1265 = torch.aten.view %1262, %1264 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1266 = torch.aten.mm %1265, %1263 : !torch.tensor, !torch.tensor -> !torch.tensor
%1267 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1268 = torch.operator "aten._unsafe_view"(%1266, %1267) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1269 = torch.aten.add_.Tensor %1268, %arg67, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1270 = torch.aten.empty_like %1269, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1271 = torch.operator "aten.bernoulli_.float"(%1270, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1272 = torch.aten.div_.Scalar %1271, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1273 = torch.aten.mul.Tensor %1269, %1272 : !torch.tensor, !torch.tensor -> !torch.tensor
%1274 = torch.aten.add.Tensor %1273, %result0_66, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1275 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_69, %result1_70, %result2_71 = torch.aten.native_layer_norm %1274, %1275, %arg66, %arg65, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1276 = torch.aten.t %arg203 : !torch.tensor -> !torch.tensor
%1277 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1278 = torch.aten.view %result0_69, %1277 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1279 = torch.aten.mm %1278, %1276 : !torch.tensor, !torch.tensor -> !torch.tensor
%1280 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1281 = torch.operator "aten._unsafe_view"(%1279, %1280) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1282 = torch.aten.add_.Tensor %1281, %arg202, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1283 = torch.aten.gelu %1282 : !torch.tensor -> !torch.tensor
%1284 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_72, %result1_73, %result2_74 = torch.aten.native_layer_norm %1283, %1284, %arg201, %arg200, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1285 = torch.aten.t %arg199 : !torch.tensor -> !torch.tensor
%1286 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1287 = torch.aten.view %result0_72, %1286 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1288 = torch.aten.mm %1287, %1285 : !torch.tensor, !torch.tensor -> !torch.tensor
%1289 = torch.prim.ListConstruct %int4, %int512, %int30522 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1290 = torch.operator "aten._unsafe_view"(%1288, %1289) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1291 = torch.aten.add_.Tensor %1290, %arg198, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1292 = torch.prim.ListConstruct %int-1, %int30522 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1293 = torch.aten.view %1291, %1292 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1294 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<!torch.int>
%1295 = torch.aten.view %arg207, %1294 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1296 = torch.operator "aten._log_softmax"(%1293, %int1, %false) : (!torch.tensor, !torch.int, !torch.bool) -> !torch.tensor
%output, %total_weight = torch.aten.nll_loss_forward %1296, %1295, %none, %int1, %int-100 : !torch.tensor, !torch.tensor, !torch.none, !torch.int, !torch.int -> !torch.tensor, !torch.tensor
%1297 = torch.aten.transpose.int %1228, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1298 = torch.aten.transpose.int %1233, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1299 = torch.aten.transpose.int %1209, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1300 = torch.aten.transpose.int %1214, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1301 = torch.aten.transpose.int %1123, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1302 = torch.aten.transpose.int %1128, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1303 = torch.aten.transpose.int %1104, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1304 = torch.aten.transpose.int %1109, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1305 = torch.aten.transpose.int %1018, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1306 = torch.aten.transpose.int %1023, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1307 = torch.aten.transpose.int %999, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1308 = torch.aten.transpose.int %1004, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1309 = torch.aten.transpose.int %913, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1310 = torch.aten.transpose.int %918, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1311 = torch.aten.transpose.int %894, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1312 = torch.aten.transpose.int %899, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1313 = torch.aten.transpose.int %808, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1314 = torch.aten.transpose.int %813, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1315 = torch.aten.transpose.int %789, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1316 = torch.aten.transpose.int %794, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1317 = torch.aten.transpose.int %703, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1318 = torch.aten.transpose.int %708, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1319 = torch.aten.transpose.int %684, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1320 = torch.aten.transpose.int %689, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1321 = torch.aten.transpose.int %598, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1322 = torch.aten.transpose.int %603, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1323 = torch.aten.transpose.int %579, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1324 = torch.aten.transpose.int %584, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1325 = torch.aten.transpose.int %493, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1326 = torch.aten.transpose.int %498, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1327 = torch.aten.transpose.int %474, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1328 = torch.aten.transpose.int %479, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1329 = torch.aten.transpose.int %388, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1330 = torch.aten.transpose.int %393, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1331 = torch.aten.transpose.int %369, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1332 = torch.aten.transpose.int %374, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1333 = torch.aten.transpose.int %283, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1334 = torch.aten.transpose.int %288, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1335 = torch.aten.transpose.int %264, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1336 = torch.aten.transpose.int %269, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1337 = torch.aten.transpose.int %178, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1338 = torch.aten.transpose.int %183, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1339 = torch.aten.transpose.int %159, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1340 = torch.aten.transpose.int %164, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1341 = torch.aten.transpose.int %73, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1342 = torch.aten.transpose.int %78, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1343 = torch.aten.transpose.int %54, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1344 = torch.aten.transpose.int %59, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1345 = torch.prim.ListConstruct %output, %1291, %306, %result2_20, %arg98, %result2_50, %1013, %1274, %arg36, %result1_16, %824, %590, %1075, %arg120, %arg37, %445, %result2_5, %arg161, %arg26, %1337, %1244, %result2_11, %result1_7, %arg100, %1223, %arg76, %arg136, %123, %arg94, %1296, %509, %result1_10, %arg116, %320, %593, %732, %arg177, %arg16, %539, %arg133, %arg49, %963, %result1_52, %arg70, %719, %arg40, %arg33, %arg174, %result2_2, %arg60, %arg53, %4, %1115, %203, %arg118, %arg65, %arg200, %result1_37, %arg30, %arg90, %1321, %456, %1329, %result1_46, %1152, %1313, %arg0, %96, %1331, %arg112, %655, %result2_35, %arg66, %arg130, %arg38, %arg194, %arg166, %1220, %333, %25, %1251, %327, %arg149, %result1_34, %516, %1300, %627, %518, %852, %arg188, %result1_4, %result2_56, %1333, %1302, %arg158, %905, %arg44, %278, %438, %211, %result2_41, %1324, %854, %arg152, %1334, %858, %arg81, %876, %488, %arg28, %result2_44, %arg181, %117, %result1_22, %1086, %arg150, %result1_49, %380, %arg145, %1315, %arg140, %1307, %5, %result2_47, %arg192, %946, %1323, %arg92, %621, %arg21, %1325, %arg42, %arg186, %result2_53, %614, %1339, %215, %981, %result2_23, %result1_28, %14, %arg138, %arg193, %arg154, %arg203, %65, %result1_43, %arg46, %666, %1055, %1309, %arg146, %result1_73, %1156, %1328, %arg182, %arg180, %1318, %740, %1299, %383, %1327, %803, %arg104, %1322, %68, %194, %1051, %1265, %1139, %arg17, %result2_38, %222, %299, %arg106, %644, %207, %result1_70, %831, %432, %170, %arg85, %938, %957, %1343, %result1, %561, %1303, %arg74, %1068, %89, %485, %arg64, %arg190, %417, %1146, %312, %arg160, %arg124, %1338, %result1_13, %result2_68, %arg164, %arg18, %425, %1341, %970, %arg68, %result2_8, %arg101, %result2_17, %arg1, %942, %1278, %arg88, %275, %411, %434, %1312, %arg128, %1314, %arg80, %749, %arg62, %1010, %1160, %1272, %1173, %1298, %arg110, %result1_55, %543, %800, %arg184, %929, %arg178, %arg113, %arg48, %698, %1287, %result1_31, %1308, %1043, %1047, %1261, %1304, %18, %result2_26, %arg97, %1305, %arg14, %936, %695, %arg84, %228, %760, %173, %result2_71, %arg86, %arg134, %642, %908, %result2_65, %1335, %arg122, %arg54, %arg58, %total_weight, %771, %1332, %result1_67, %1317, %arg129, %1167, %arg132, %246, %753, %arg96, %result1_64, %result2, %1330, %result1_40, %841, %arg20, %result1_25, %1320, %329, %736, %result2_14, %result1_58, %result2_29, %1148, %98, %351, %arg10, %141, %1253, %308, %arg24, %550, %arg108, %1344, %1257, %1342, %arg69, %arg201, %arg206, %413, %arg126, %833, %arg32, %arg56, %845, %623, %537, %arg142, %result1_19, %1301, %result1_1, %201, %1310, %1340, %1326, %36, %arg78, %arg196, %1336, %10, %result2_32, %1311, %648, %421, %235, %726, %arg148, %arg199, %arg176, %arg168, %1062, %1295, %1319, %arg6, %arg162, %950, %1296, %728, %959, %result2_62, %340, %1118, %result2_74, %arg117, %1282, %119, %arg114, %1180, %arg12, %106, %arg72, %1297, %arg52, %1169, %result2_59, %result1_61, %arg50, %arg172, %arg8, %631, %865, %arg144, %130, %arg34, %1064, %635, %747, %404, %1041, %arg82, %1191, %526, %102, %837, %224, %arg170, %1034, %530, %arg165, %arg22, %arg102, %arg5, %110, %1316, %1306, %arg156, %316, %522 : (!torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor) -> !torch.list<!torch.tensor>
return %1345 : !torch.list<!torch.tensor>
}
}
// -----// IR Dump After Canonicalizer //----- //
func @forward(%arg0: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg1: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg2: !torch.tensor {torch.type_bound = !torch.vtensor<[512,768],f32>}, %arg3: !torch.tensor {torch.type_bound = !torch.vtensor<[2,768],f32>}, %arg4: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,768],f32>}, %arg5: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg6: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg7: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg8: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg9: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg10: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg11: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg12: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg13: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg14: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg15: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg16: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg17: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg18: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg19: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg20: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg21: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg22: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg23: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg24: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg25: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg26: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg27: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg28: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg29: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg30: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg31: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg32: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg33: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg34: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg35: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg36: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg37: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg38: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg39: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg40: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg41: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg42: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg43: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg44: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg45: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg46: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg47: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg48: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg49: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg50: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg51: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg52: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg53: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg54: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg55: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg56: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg57: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg58: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg59: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg60: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg61: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg62: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg63: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg64: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg65: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg66: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg67: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg68: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg69: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg70: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg71: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg72: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg73: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg74: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg75: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg76: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg77: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg78: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg79: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg80: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg81: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg82: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg83: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg84: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg85: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg86: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg87: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg88: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg89: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg90: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg91: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg92: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg93: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg94: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg95: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg96: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg97: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg98: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg99: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg100: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg101: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg102: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg103: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg104: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg105: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg106: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg107: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg108: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg109: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg110: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg111: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg112: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg113: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg114: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg115: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg116: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg117: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg118: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg119: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg120: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg121: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg122: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg123: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg124: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg125: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg126: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg127: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg128: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg129: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg130: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg131: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg132: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg133: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg134: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg135: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg136: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg137: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg138: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg139: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg140: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg141: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg142: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg143: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg144: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg145: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg146: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg147: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg148: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg149: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg150: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg151: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg152: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg153: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg154: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg155: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg156: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg157: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg158: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg159: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg160: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg161: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg162: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg163: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg164: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg165: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg166: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg167: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg168: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg169: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg170: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg171: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg172: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg173: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg174: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg175: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg176: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg177: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg178: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg179: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg180: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg181: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg182: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg183: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg184: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg185: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg186: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg187: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg188: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg189: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg190: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg191: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg192: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg193: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg194: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg195: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg196: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg197: !torch.tensor {torch.type_bound = !torch.vtensor<[30522],f32>}, %arg198: !torch.tensor {torch.type_bound = !torch.vtensor<[30522],f32>}, %arg199: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,768],f32>}, %arg200: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg201: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg202: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg203: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg204: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg205: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg206: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512],si64>}, %arg207: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512],si64>}) -> !torch.list<!torch.tensor> {
%int-100 = torch.constant.int -100
%int30522 = torch.constant.int 30522
%int3072 = torch.constant.int 3072
%int48 = torch.constant.int 48
%int-2 = torch.constant.int -2
%int-1 = torch.constant.int -1
%int3 = torch.constant.int 3
%int1 = torch.constant.int 1
%int2 = torch.constant.int 2
%int64 = torch.constant.int 64
%int12 = torch.constant.int 12
%int2048 = torch.constant.int 2048
%float9.000000e-01 = torch.constant.float 9.000000e-01
%float9.999990e-13 = torch.constant.float 9.9999999999999998E-13
%int768 = torch.constant.int 768
%int512 = torch.constant.int 512
%int4 = torch.constant.int 4
%int9223372036854775807 = torch.constant.int 9223372036854775807
%int0 = torch.constant.int 0
%false = torch.constant.bool false
%none = torch.constant.none
%0 = torch.tensor.literal(dense<-0.000000e+00> : tensor<4x1x1x512xf32>) : !torch.tensor<[4,1,1,512],f32>
%1 = torch.tensor.literal(dense<8.000000e+00> : tensor<f64>) : !torch.tensor<[],f64>
%2 = torch.aten.slice.Tensor %arg205, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%3 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%4 = torch.aten.expand %2, %3, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%5 = torch.aten.slice.Tensor %arg204, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%6 = torch.aten.embedding %arg4, %arg206, %int0, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%7 = torch.aten.embedding %arg3, %4, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%8 = torch.aten.add.Tensor %6, %7, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%9 = torch.aten.embedding %arg2, %5, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%10 = torch.aten.add_.Tensor %8, %9, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%11 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0, %result1, %result2 = torch.aten.native_layer_norm %10, %11, %arg1, %arg0, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%12 = torch.aten.empty_like %result0, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%13 = torch.operator "aten.bernoulli_.float"(%12, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%14 = torch.aten.div_.Scalar %13, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%15 = torch.aten.mul.Tensor %result0, %14 : !torch.tensor, !torch.tensor -> !torch.tensor
%16 = torch.aten.t %arg12 : !torch.tensor -> !torch.tensor
%17 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%18 = torch.aten.view %15, %17 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%19 = torch.aten.mm %18, %16 : !torch.tensor, !torch.tensor -> !torch.tensor
%20 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%21 = torch.operator "aten._unsafe_view"(%19, %20) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%22 = torch.aten.add_.Tensor %21, %arg11, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%23 = torch.aten.t %arg10 : !torch.tensor -> !torch.tensor
%24 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%25 = torch.aten.view %15, %24 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%26 = torch.aten.mm %25, %23 : !torch.tensor, !torch.tensor -> !torch.tensor
%27 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%28 = torch.operator "aten._unsafe_view"(%26, %27) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%29 = torch.aten.add_.Tensor %28, %arg9, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%30 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%31 = torch.aten.view %29, %30 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%32 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%33 = torch.aten.permute %31, %32 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%34 = torch.aten.t %arg14 : !torch.tensor -> !torch.tensor
%35 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%36 = torch.aten.view %15, %35 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%37 = torch.aten.mm %36, %34 : !torch.tensor, !torch.tensor -> !torch.tensor
%38 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%39 = torch.operator "aten._unsafe_view"(%37, %38) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%40 = torch.aten.add_.Tensor %39, %arg13, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%41 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%42 = torch.aten.view %40, %41 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%43 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%44 = torch.aten.permute %42, %43 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%45 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%46 = torch.aten.view %22, %45 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%47 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%48 = torch.aten.permute %46, %47 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%49 = torch.aten.transpose.int %33, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%50 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%51 = torch.aten.expand %48, %50, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%52 = torch.operator "aten.clone"(%51, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%53 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%54 = torch.operator "aten._unsafe_view"(%52, %53) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%55 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%56 = torch.aten.expand %49, %55, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%57 = torch.operator "aten.clone"(%56, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%58 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%59 = torch.operator "aten._unsafe_view"(%57, %58) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%60 = torch.aten.bmm %54, %59 : !torch.tensor, !torch.tensor -> !torch.tensor
%61 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%62 = torch.operator "aten._unsafe_view"(%60, %61) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%63 = torch.aten.div.Tensor %62, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%64 = torch.aten.add.Tensor %63, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%65 = torch.aten._softmax %64, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%66 = torch.aten.empty_like %65, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%67 = torch.operator "aten.bernoulli_.float"(%66, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%68 = torch.aten.div_.Scalar %67, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%69 = torch.aten.mul.Tensor %65, %68 : !torch.tensor, !torch.tensor -> !torch.tensor
%70 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%71 = torch.aten.expand %69, %70, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%72 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%73 = torch.aten.view %71, %72 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%74 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%75 = torch.aten.expand %44, %74, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%76 = torch.operator "aten.clone"(%75, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%77 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%78 = torch.operator "aten._unsafe_view"(%76, %77) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%79 = torch.aten.bmm %73, %78 : !torch.tensor, !torch.tensor -> !torch.tensor
%80 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%81 = torch.operator "aten._unsafe_view"(%79, %80) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%82 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%83 = torch.aten.permute %81, %82 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%84 = torch.operator "aten.clone"(%83, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%85 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%86 = torch.aten.view %84, %85 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%87 = torch.aten.t %arg8 : !torch.tensor -> !torch.tensor
%88 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%89 = torch.aten.view %86, %88 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%90 = torch.aten.mm %89, %87 : !torch.tensor, !torch.tensor -> !torch.tensor
%91 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%92 = torch.operator "aten._unsafe_view"(%90, %91) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%93 = torch.aten.add_.Tensor %92, %arg7, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%94 = torch.aten.empty_like %93, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%95 = torch.operator "aten.bernoulli_.float"(%94, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%96 = torch.aten.div_.Scalar %95, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%97 = torch.aten.mul.Tensor %93, %96 : !torch.tensor, !torch.tensor -> !torch.tensor
%98 = torch.aten.add.Tensor %97, %15, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%99 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_0, %result1_1, %result2_2 = torch.aten.native_layer_norm %98, %99, %arg6, %arg5, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%100 = torch.aten.t %arg16 : !torch.tensor -> !torch.tensor
%101 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%102 = torch.aten.view %result0_0, %101 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%103 = torch.aten.mm %102, %100 : !torch.tensor, !torch.tensor -> !torch.tensor
%104 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%105 = torch.operator "aten._unsafe_view"(%103, %104) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%106 = torch.aten.add_.Tensor %105, %arg15, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%107 = torch.aten.gelu %106 : !torch.tensor -> !torch.tensor
%108 = torch.aten.t %arg20 : !torch.tensor -> !torch.tensor
%109 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%110 = torch.aten.view %107, %109 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%111 = torch.aten.mm %110, %108 : !torch.tensor, !torch.tensor -> !torch.tensor
%112 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%113 = torch.operator "aten._unsafe_view"(%111, %112) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%114 = torch.aten.add_.Tensor %113, %arg19, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%115 = torch.aten.empty_like %114, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%116 = torch.operator "aten.bernoulli_.float"(%115, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%117 = torch.aten.div_.Scalar %116, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%118 = torch.aten.mul.Tensor %114, %117 : !torch.tensor, !torch.tensor -> !torch.tensor
%119 = torch.aten.add.Tensor %118, %result0_0, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%120 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_3, %result1_4, %result2_5 = torch.aten.native_layer_norm %119, %120, %arg18, %arg17, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%121 = torch.aten.t %arg28 : !torch.tensor -> !torch.tensor
%122 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%123 = torch.aten.view %result0_3, %122 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%124 = torch.aten.mm %123, %121 : !torch.tensor, !torch.tensor -> !torch.tensor
%125 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%126 = torch.operator "aten._unsafe_view"(%124, %125) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%127 = torch.aten.add_.Tensor %126, %arg27, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%128 = torch.aten.t %arg26 : !torch.tensor -> !torch.tensor
%129 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%130 = torch.aten.view %result0_3, %129 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%131 = torch.aten.mm %130, %128 : !torch.tensor, !torch.tensor -> !torch.tensor
%132 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%133 = torch.operator "aten._unsafe_view"(%131, %132) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%134 = torch.aten.add_.Tensor %133, %arg25, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%135 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%136 = torch.aten.view %134, %135 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%137 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%138 = torch.aten.permute %136, %137 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%139 = torch.aten.t %arg30 : !torch.tensor -> !torch.tensor
%140 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%141 = torch.aten.view %result0_3, %140 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%142 = torch.aten.mm %141, %139 : !torch.tensor, !torch.tensor -> !torch.tensor
%143 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%144 = torch.operator "aten._unsafe_view"(%142, %143) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%145 = torch.aten.add_.Tensor %144, %arg29, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%146 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%147 = torch.aten.view %145, %146 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%148 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%149 = torch.aten.permute %147, %148 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%150 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%151 = torch.aten.view %127, %150 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%152 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%153 = torch.aten.permute %151, %152 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%154 = torch.aten.transpose.int %138, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%155 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%156 = torch.aten.expand %153, %155, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%157 = torch.operator "aten.clone"(%156, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%158 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%159 = torch.operator "aten._unsafe_view"(%157, %158) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%160 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%161 = torch.aten.expand %154, %160, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%162 = torch.operator "aten.clone"(%161, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%163 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%164 = torch.operator "aten._unsafe_view"(%162, %163) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%165 = torch.aten.bmm %159, %164 : !torch.tensor, !torch.tensor -> !torch.tensor
%166 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%167 = torch.operator "aten._unsafe_view"(%165, %166) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%168 = torch.aten.div.Tensor %167, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%169 = torch.aten.add.Tensor %168, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%170 = torch.aten._softmax %169, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%171 = torch.aten.empty_like %170, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%172 = torch.operator "aten.bernoulli_.float"(%171, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%173 = torch.aten.div_.Scalar %172, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%174 = torch.aten.mul.Tensor %170, %173 : !torch.tensor, !torch.tensor -> !torch.tensor
%175 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%176 = torch.aten.expand %174, %175, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%177 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%178 = torch.aten.view %176, %177 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%179 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%180 = torch.aten.expand %149, %179, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%181 = torch.operator "aten.clone"(%180, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%182 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%183 = torch.operator "aten._unsafe_view"(%181, %182) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%184 = torch.aten.bmm %178, %183 : !torch.tensor, !torch.tensor -> !torch.tensor
%185 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%186 = torch.operator "aten._unsafe_view"(%184, %185) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%187 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%188 = torch.aten.permute %186, %187 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%189 = torch.operator "aten.clone"(%188, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%190 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%191 = torch.aten.view %189, %190 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%192 = torch.aten.t %arg24 : !torch.tensor -> !torch.tensor
%193 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%194 = torch.aten.view %191, %193 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%195 = torch.aten.mm %194, %192 : !torch.tensor, !torch.tensor -> !torch.tensor
%196 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%197 = torch.operator "aten._unsafe_view"(%195, %196) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%198 = torch.aten.add_.Tensor %197, %arg23, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%199 = torch.aten.empty_like %198, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%200 = torch.operator "aten.bernoulli_.float"(%199, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%201 = torch.aten.div_.Scalar %200, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%202 = torch.aten.mul.Tensor %198, %201 : !torch.tensor, !torch.tensor -> !torch.tensor
%203 = torch.aten.add.Tensor %202, %result0_3, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%204 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_6, %result1_7, %result2_8 = torch.aten.native_layer_norm %203, %204, %arg22, %arg21, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%205 = torch.aten.t %arg32 : !torch.tensor -> !torch.tensor
%206 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%207 = torch.aten.view %result0_6, %206 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%208 = torch.aten.mm %207, %205 : !torch.tensor, !torch.tensor -> !torch.tensor
%209 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%210 = torch.operator "aten._unsafe_view"(%208, %209) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%211 = torch.aten.add_.Tensor %210, %arg31, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%212 = torch.aten.gelu %211 : !torch.tensor -> !torch.tensor
%213 = torch.aten.t %arg36 : !torch.tensor -> !torch.tensor
%214 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%215 = torch.aten.view %212, %214 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%216 = torch.aten.mm %215, %213 : !torch.tensor, !torch.tensor -> !torch.tensor
%217 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%218 = torch.operator "aten._unsafe_view"(%216, %217) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%219 = torch.aten.add_.Tensor %218, %arg35, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%220 = torch.aten.empty_like %219, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%221 = torch.operator "aten.bernoulli_.float"(%220, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%222 = torch.aten.div_.Scalar %221, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%223 = torch.aten.mul.Tensor %219, %222 : !torch.tensor, !torch.tensor -> !torch.tensor
%224 = torch.aten.add.Tensor %223, %result0_6, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%225 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_9, %result1_10, %result2_11 = torch.aten.native_layer_norm %224, %225, %arg34, %arg33, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%226 = torch.aten.t %arg76 : !torch.tensor -> !torch.tensor
%227 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%228 = torch.aten.view %result0_9, %227 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%229 = torch.aten.mm %228, %226 : !torch.tensor, !torch.tensor -> !torch.tensor
%230 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%231 = torch.operator "aten._unsafe_view"(%229, %230) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%232 = torch.aten.add_.Tensor %231, %arg75, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%233 = torch.aten.t %arg74 : !torch.tensor -> !torch.tensor
%234 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%235 = torch.aten.view %result0_9, %234 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%236 = torch.aten.mm %235, %233 : !torch.tensor, !torch.tensor -> !torch.tensor
%237 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%238 = torch.operator "aten._unsafe_view"(%236, %237) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%239 = torch.aten.add_.Tensor %238, %arg73, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%240 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%241 = torch.aten.view %239, %240 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%242 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%243 = torch.aten.permute %241, %242 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%244 = torch.aten.t %arg78 : !torch.tensor -> !torch.tensor
%245 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%246 = torch.aten.view %result0_9, %245 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%247 = torch.aten.mm %246, %244 : !torch.tensor, !torch.tensor -> !torch.tensor
%248 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%249 = torch.operator "aten._unsafe_view"(%247, %248) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%250 = torch.aten.add_.Tensor %249, %arg77, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%251 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%252 = torch.aten.view %250, %251 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%253 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%254 = torch.aten.permute %252, %253 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%255 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%256 = torch.aten.view %232, %255 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%257 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%258 = torch.aten.permute %256, %257 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%259 = torch.aten.transpose.int %243, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%260 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%261 = torch.aten.expand %258, %260, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%262 = torch.operator "aten.clone"(%261, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%263 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%264 = torch.operator "aten._unsafe_view"(%262, %263) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%265 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%266 = torch.aten.expand %259, %265, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%267 = torch.operator "aten.clone"(%266, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%268 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%269 = torch.operator "aten._unsafe_view"(%267, %268) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%270 = torch.aten.bmm %264, %269 : !torch.tensor, !torch.tensor -> !torch.tensor
%271 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%272 = torch.operator "aten._unsafe_view"(%270, %271) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%273 = torch.aten.div.Tensor %272, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%274 = torch.aten.add.Tensor %273, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%275 = torch.aten._softmax %274, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%276 = torch.aten.empty_like %275, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%277 = torch.operator "aten.bernoulli_.float"(%276, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%278 = torch.aten.div_.Scalar %277, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%279 = torch.aten.mul.Tensor %275, %278 : !torch.tensor, !torch.tensor -> !torch.tensor
%280 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%281 = torch.aten.expand %279, %280, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%282 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%283 = torch.aten.view %281, %282 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%284 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%285 = torch.aten.expand %254, %284, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%286 = torch.operator "aten.clone"(%285, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%287 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%288 = torch.operator "aten._unsafe_view"(%286, %287) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%289 = torch.aten.bmm %283, %288 : !torch.tensor, !torch.tensor -> !torch.tensor
%290 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%291 = torch.operator "aten._unsafe_view"(%289, %290) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%292 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%293 = torch.aten.permute %291, %292 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%294 = torch.operator "aten.clone"(%293, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%295 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%296 = torch.aten.view %294, %295 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%297 = torch.aten.t %arg72 : !torch.tensor -> !torch.tensor
%298 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%299 = torch.aten.view %296, %298 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%300 = torch.aten.mm %299, %297 : !torch.tensor, !torch.tensor -> !torch.tensor
%301 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%302 = torch.operator "aten._unsafe_view"(%300, %301) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%303 = torch.aten.add_.Tensor %302, %arg71, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%304 = torch.aten.empty_like %303, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%305 = torch.operator "aten.bernoulli_.float"(%304, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%306 = torch.aten.div_.Scalar %305, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%307 = torch.aten.mul.Tensor %303, %306 : !torch.tensor, !torch.tensor -> !torch.tensor
%308 = torch.aten.add.Tensor %307, %result0_9, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%309 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_12, %result1_13, %result2_14 = torch.aten.native_layer_norm %308, %309, %arg70, %arg69, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%310 = torch.aten.t %arg80 : !torch.tensor -> !torch.tensor
%311 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%312 = torch.aten.view %result0_12, %311 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%313 = torch.aten.mm %312, %310 : !torch.tensor, !torch.tensor -> !torch.tensor
%314 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%315 = torch.operator "aten._unsafe_view"(%313, %314) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%316 = torch.aten.add_.Tensor %315, %arg79, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%317 = torch.aten.gelu %316 : !torch.tensor -> !torch.tensor
%318 = torch.aten.t %arg84 : !torch.tensor -> !torch.tensor
%319 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%320 = torch.aten.view %317, %319 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%321 = torch.aten.mm %320, %318 : !torch.tensor, !torch.tensor -> !torch.tensor
%322 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%323 = torch.operator "aten._unsafe_view"(%321, %322) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%324 = torch.aten.add_.Tensor %323, %arg83, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%325 = torch.aten.empty_like %324, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%326 = torch.operator "aten.bernoulli_.float"(%325, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%327 = torch.aten.div_.Scalar %326, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%328 = torch.aten.mul.Tensor %324, %327 : !torch.tensor, !torch.tensor -> !torch.tensor
%329 = torch.aten.add.Tensor %328, %result0_12, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%330 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_15, %result1_16, %result2_17 = torch.aten.native_layer_norm %329, %330, %arg82, %arg81, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%331 = torch.aten.t %arg92 : !torch.tensor -> !torch.tensor
%332 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%333 = torch.aten.view %result0_15, %332 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%334 = torch.aten.mm %333, %331 : !torch.tensor, !torch.tensor -> !torch.tensor
%335 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%336 = torch.operator "aten._unsafe_view"(%334, %335) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%337 = torch.aten.add_.Tensor %336, %arg91, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%338 = torch.aten.t %arg90 : !torch.tensor -> !torch.tensor
%339 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%340 = torch.aten.view %result0_15, %339 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%341 = torch.aten.mm %340, %338 : !torch.tensor, !torch.tensor -> !torch.tensor
%342 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%343 = torch.operator "aten._unsafe_view"(%341, %342) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%344 = torch.aten.add_.Tensor %343, %arg89, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%345 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%346 = torch.aten.view %344, %345 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%347 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%348 = torch.aten.permute %346, %347 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%349 = torch.aten.t %arg94 : !torch.tensor -> !torch.tensor
%350 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%351 = torch.aten.view %result0_15, %350 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%352 = torch.aten.mm %351, %349 : !torch.tensor, !torch.tensor -> !torch.tensor
%353 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%354 = torch.operator "aten._unsafe_view"(%352, %353) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%355 = torch.aten.add_.Tensor %354, %arg93, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%356 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%357 = torch.aten.view %355, %356 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%358 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%359 = torch.aten.permute %357, %358 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%360 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%361 = torch.aten.view %337, %360 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%362 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%363 = torch.aten.permute %361, %362 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%364 = torch.aten.transpose.int %348, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%365 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%366 = torch.aten.expand %363, %365, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%367 = torch.operator "aten.clone"(%366, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%368 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%369 = torch.operator "aten._unsafe_view"(%367, %368) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%370 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%371 = torch.aten.expand %364, %370, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%372 = torch.operator "aten.clone"(%371, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%373 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%374 = torch.operator "aten._unsafe_view"(%372, %373) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%375 = torch.aten.bmm %369, %374 : !torch.tensor, !torch.tensor -> !torch.tensor
%376 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%377 = torch.operator "aten._unsafe_view"(%375, %376) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%378 = torch.aten.div.Tensor %377, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%379 = torch.aten.add.Tensor %378, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%380 = torch.aten._softmax %379, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%381 = torch.aten.empty_like %380, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%382 = torch.operator "aten.bernoulli_.float"(%381, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%383 = torch.aten.div_.Scalar %382, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%384 = torch.aten.mul.Tensor %380, %383 : !torch.tensor, !torch.tensor -> !torch.tensor
%385 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%386 = torch.aten.expand %384, %385, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%387 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%388 = torch.aten.view %386, %387 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%389 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%390 = torch.aten.expand %359, %389, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%391 = torch.operator "aten.clone"(%390, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%392 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%393 = torch.operator "aten._unsafe_view"(%391, %392) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%394 = torch.aten.bmm %388, %393 : !torch.tensor, !torch.tensor -> !torch.tensor
%395 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%396 = torch.operator "aten._unsafe_view"(%394, %395) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%397 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%398 = torch.aten.permute %396, %397 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%399 = torch.operator "aten.clone"(%398, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%400 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%401 = torch.aten.view %399, %400 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%402 = torch.aten.t %arg88 : !torch.tensor -> !torch.tensor
%403 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%404 = torch.aten.view %401, %403 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%405 = torch.aten.mm %404, %402 : !torch.tensor, !torch.tensor -> !torch.tensor
%406 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%407 = torch.operator "aten._unsafe_view"(%405, %406) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%408 = torch.aten.add_.Tensor %407, %arg87, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%409 = torch.aten.empty_like %408, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%410 = torch.operator "aten.bernoulli_.float"(%409, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%411 = torch.aten.div_.Scalar %410, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%412 = torch.aten.mul.Tensor %408, %411 : !torch.tensor, !torch.tensor -> !torch.tensor
%413 = torch.aten.add.Tensor %412, %result0_15, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%414 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_18, %result1_19, %result2_20 = torch.aten.native_layer_norm %413, %414, %arg86, %arg85, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%415 = torch.aten.t %arg96 : !torch.tensor -> !torch.tensor
%416 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%417 = torch.aten.view %result0_18, %416 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%418 = torch.aten.mm %417, %415 : !torch.tensor, !torch.tensor -> !torch.tensor
%419 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%420 = torch.operator "aten._unsafe_view"(%418, %419) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%421 = torch.aten.add_.Tensor %420, %arg95, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%422 = torch.aten.gelu %421 : !torch.tensor -> !torch.tensor
%423 = torch.aten.t %arg100 : !torch.tensor -> !torch.tensor
%424 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%425 = torch.aten.view %422, %424 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%426 = torch.aten.mm %425, %423 : !torch.tensor, !torch.tensor -> !torch.tensor
%427 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%428 = torch.operator "aten._unsafe_view"(%426, %427) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%429 = torch.aten.add_.Tensor %428, %arg99, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%430 = torch.aten.empty_like %429, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%431 = torch.operator "aten.bernoulli_.float"(%430, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%432 = torch.aten.div_.Scalar %431, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%433 = torch.aten.mul.Tensor %429, %432 : !torch.tensor, !torch.tensor -> !torch.tensor
%434 = torch.aten.add.Tensor %433, %result0_18, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%435 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_21, %result1_22, %result2_23 = torch.aten.native_layer_norm %434, %435, %arg98, %arg97, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%436 = torch.aten.t %arg108 : !torch.tensor -> !torch.tensor
%437 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%438 = torch.aten.view %result0_21, %437 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%439 = torch.aten.mm %438, %436 : !torch.tensor, !torch.tensor -> !torch.tensor
%440 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%441 = torch.operator "aten._unsafe_view"(%439, %440) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%442 = torch.aten.add_.Tensor %441, %arg107, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%443 = torch.aten.t %arg106 : !torch.tensor -> !torch.tensor
%444 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%445 = torch.aten.view %result0_21, %444 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%446 = torch.aten.mm %445, %443 : !torch.tensor, !torch.tensor -> !torch.tensor
%447 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%448 = torch.operator "aten._unsafe_view"(%446, %447) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%449 = torch.aten.add_.Tensor %448, %arg105, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%450 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%451 = torch.aten.view %449, %450 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%452 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%453 = torch.aten.permute %451, %452 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%454 = torch.aten.t %arg110 : !torch.tensor -> !torch.tensor
%455 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%456 = torch.aten.view %result0_21, %455 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%457 = torch.aten.mm %456, %454 : !torch.tensor, !torch.tensor -> !torch.tensor
%458 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%459 = torch.operator "aten._unsafe_view"(%457, %458) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%460 = torch.aten.add_.Tensor %459, %arg109, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%461 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%462 = torch.aten.view %460, %461 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%463 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%464 = torch.aten.permute %462, %463 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%465 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%466 = torch.aten.view %442, %465 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%467 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%468 = torch.aten.permute %466, %467 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%469 = torch.aten.transpose.int %453, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%470 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%471 = torch.aten.expand %468, %470, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%472 = torch.operator "aten.clone"(%471, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%473 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%474 = torch.operator "aten._unsafe_view"(%472, %473) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%475 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%476 = torch.aten.expand %469, %475, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%477 = torch.operator "aten.clone"(%476, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%478 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%479 = torch.operator "aten._unsafe_view"(%477, %478) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%480 = torch.aten.bmm %474, %479 : !torch.tensor, !torch.tensor -> !torch.tensor
%481 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%482 = torch.operator "aten._unsafe_view"(%480, %481) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%483 = torch.aten.div.Tensor %482, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%484 = torch.aten.add.Tensor %483, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%485 = torch.aten._softmax %484, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%486 = torch.aten.empty_like %485, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%487 = torch.operator "aten.bernoulli_.float"(%486, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%488 = torch.aten.div_.Scalar %487, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%489 = torch.aten.mul.Tensor %485, %488 : !torch.tensor, !torch.tensor -> !torch.tensor
%490 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%491 = torch.aten.expand %489, %490, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%492 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%493 = torch.aten.view %491, %492 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%494 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%495 = torch.aten.expand %464, %494, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%496 = torch.operator "aten.clone"(%495, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%497 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%498 = torch.operator "aten._unsafe_view"(%496, %497) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%499 = torch.aten.bmm %493, %498 : !torch.tensor, !torch.tensor -> !torch.tensor
%500 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%501 = torch.operator "aten._unsafe_view"(%499, %500) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%502 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%503 = torch.aten.permute %501, %502 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%504 = torch.operator "aten.clone"(%503, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%505 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%506 = torch.aten.view %504, %505 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%507 = torch.aten.t %arg104 : !torch.tensor -> !torch.tensor
%508 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%509 = torch.aten.view %506, %508 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%510 = torch.aten.mm %509, %507 : !torch.tensor, !torch.tensor -> !torch.tensor
%511 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%512 = torch.operator "aten._unsafe_view"(%510, %511) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%513 = torch.aten.add_.Tensor %512, %arg103, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%514 = torch.aten.empty_like %513, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%515 = torch.operator "aten.bernoulli_.float"(%514, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%516 = torch.aten.div_.Scalar %515, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%517 = torch.aten.mul.Tensor %513, %516 : !torch.tensor, !torch.tensor -> !torch.tensor
%518 = torch.aten.add.Tensor %517, %result0_21, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%519 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_24, %result1_25, %result2_26 = torch.aten.native_layer_norm %518, %519, %arg102, %arg101, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%520 = torch.aten.t %arg112 : !torch.tensor -> !torch.tensor
%521 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%522 = torch.aten.view %result0_24, %521 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%523 = torch.aten.mm %522, %520 : !torch.tensor, !torch.tensor -> !torch.tensor
%524 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%525 = torch.operator "aten._unsafe_view"(%523, %524) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%526 = torch.aten.add_.Tensor %525, %arg111, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%527 = torch.aten.gelu %526 : !torch.tensor -> !torch.tensor
%528 = torch.aten.t %arg116 : !torch.tensor -> !torch.tensor
%529 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%530 = torch.aten.view %527, %529 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%531 = torch.aten.mm %530, %528 : !torch.tensor, !torch.tensor -> !torch.tensor
%532 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%533 = torch.operator "aten._unsafe_view"(%531, %532) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%534 = torch.aten.add_.Tensor %533, %arg115, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%535 = torch.aten.empty_like %534, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%536 = torch.operator "aten.bernoulli_.float"(%535, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%537 = torch.aten.div_.Scalar %536, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%538 = torch.aten.mul.Tensor %534, %537 : !torch.tensor, !torch.tensor -> !torch.tensor
%539 = torch.aten.add.Tensor %538, %result0_24, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%540 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_27, %result1_28, %result2_29 = torch.aten.native_layer_norm %539, %540, %arg114, %arg113, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%541 = torch.aten.t %arg124 : !torch.tensor -> !torch.tensor
%542 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%543 = torch.aten.view %result0_27, %542 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%544 = torch.aten.mm %543, %541 : !torch.tensor, !torch.tensor -> !torch.tensor
%545 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%546 = torch.operator "aten._unsafe_view"(%544, %545) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%547 = torch.aten.add_.Tensor %546, %arg123, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%548 = torch.aten.t %arg122 : !torch.tensor -> !torch.tensor
%549 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%550 = torch.aten.view %result0_27, %549 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%551 = torch.aten.mm %550, %548 : !torch.tensor, !torch.tensor -> !torch.tensor
%552 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%553 = torch.operator "aten._unsafe_view"(%551, %552) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%554 = torch.aten.add_.Tensor %553, %arg121, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%555 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%556 = torch.aten.view %554, %555 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%557 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%558 = torch.aten.permute %556, %557 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%559 = torch.aten.t %arg126 : !torch.tensor -> !torch.tensor
%560 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%561 = torch.aten.view %result0_27, %560 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%562 = torch.aten.mm %561, %559 : !torch.tensor, !torch.tensor -> !torch.tensor
%563 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%564 = torch.operator "aten._unsafe_view"(%562, %563) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%565 = torch.aten.add_.Tensor %564, %arg125, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%566 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%567 = torch.aten.view %565, %566 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%568 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%569 = torch.aten.permute %567, %568 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%570 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%571 = torch.aten.view %547, %570 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%572 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%573 = torch.aten.permute %571, %572 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%574 = torch.aten.transpose.int %558, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%575 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%576 = torch.aten.expand %573, %575, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%577 = torch.operator "aten.clone"(%576, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%578 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%579 = torch.operator "aten._unsafe_view"(%577, %578) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%580 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%581 = torch.aten.expand %574, %580, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%582 = torch.operator "aten.clone"(%581, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%583 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%584 = torch.operator "aten._unsafe_view"(%582, %583) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%585 = torch.aten.bmm %579, %584 : !torch.tensor, !torch.tensor -> !torch.tensor
%586 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%587 = torch.operator "aten._unsafe_view"(%585, %586) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%588 = torch.aten.div.Tensor %587, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%589 = torch.aten.add.Tensor %588, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%590 = torch.aten._softmax %589, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%591 = torch.aten.empty_like %590, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%592 = torch.operator "aten.bernoulli_.float"(%591, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%593 = torch.aten.div_.Scalar %592, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%594 = torch.aten.mul.Tensor %590, %593 : !torch.tensor, !torch.tensor -> !torch.tensor
%595 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%596 = torch.aten.expand %594, %595, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%597 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%598 = torch.aten.view %596, %597 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%599 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%600 = torch.aten.expand %569, %599, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%601 = torch.operator "aten.clone"(%600, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%602 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%603 = torch.operator "aten._unsafe_view"(%601, %602) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%604 = torch.aten.bmm %598, %603 : !torch.tensor, !torch.tensor -> !torch.tensor
%605 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%606 = torch.operator "aten._unsafe_view"(%604, %605) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%607 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%608 = torch.aten.permute %606, %607 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%609 = torch.operator "aten.clone"(%608, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%610 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%611 = torch.aten.view %609, %610 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%612 = torch.aten.t %arg120 : !torch.tensor -> !torch.tensor
%613 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%614 = torch.aten.view %611, %613 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%615 = torch.aten.mm %614, %612 : !torch.tensor, !torch.tensor -> !torch.tensor
%616 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%617 = torch.operator "aten._unsafe_view"(%615, %616) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%618 = torch.aten.add_.Tensor %617, %arg119, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%619 = torch.aten.empty_like %618, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%620 = torch.operator "aten.bernoulli_.float"(%619, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%621 = torch.aten.div_.Scalar %620, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%622 = torch.aten.mul.Tensor %618, %621 : !torch.tensor, !torch.tensor -> !torch.tensor
%623 = torch.aten.add.Tensor %622, %result0_27, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%624 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_30, %result1_31, %result2_32 = torch.aten.native_layer_norm %623, %624, %arg118, %arg117, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%625 = torch.aten.t %arg128 : !torch.tensor -> !torch.tensor
%626 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%627 = torch.aten.view %result0_30, %626 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%628 = torch.aten.mm %627, %625 : !torch.tensor, !torch.tensor -> !torch.tensor
%629 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%630 = torch.operator "aten._unsafe_view"(%628, %629) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%631 = torch.aten.add_.Tensor %630, %arg127, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%632 = torch.aten.gelu %631 : !torch.tensor -> !torch.tensor
%633 = torch.aten.t %arg132 : !torch.tensor -> !torch.tensor
%634 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%635 = torch.aten.view %632, %634 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%636 = torch.aten.mm %635, %633 : !torch.tensor, !torch.tensor -> !torch.tensor
%637 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%638 = torch.operator "aten._unsafe_view"(%636, %637) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%639 = torch.aten.add_.Tensor %638, %arg131, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%640 = torch.aten.empty_like %639, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%641 = torch.operator "aten.bernoulli_.float"(%640, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%642 = torch.aten.div_.Scalar %641, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%643 = torch.aten.mul.Tensor %639, %642 : !torch.tensor, !torch.tensor -> !torch.tensor
%644 = torch.aten.add.Tensor %643, %result0_30, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%645 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_33, %result1_34, %result2_35 = torch.aten.native_layer_norm %644, %645, %arg130, %arg129, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%646 = torch.aten.t %arg140 : !torch.tensor -> !torch.tensor
%647 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%648 = torch.aten.view %result0_33, %647 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%649 = torch.aten.mm %648, %646 : !torch.tensor, !torch.tensor -> !torch.tensor
%650 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%651 = torch.operator "aten._unsafe_view"(%649, %650) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%652 = torch.aten.add_.Tensor %651, %arg139, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%653 = torch.aten.t %arg138 : !torch.tensor -> !torch.tensor
%654 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%655 = torch.aten.view %result0_33, %654 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%656 = torch.aten.mm %655, %653 : !torch.tensor, !torch.tensor -> !torch.tensor
%657 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%658 = torch.operator "aten._unsafe_view"(%656, %657) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%659 = torch.aten.add_.Tensor %658, %arg137, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%660 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%661 = torch.aten.view %659, %660 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%662 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%663 = torch.aten.permute %661, %662 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%664 = torch.aten.t %arg142 : !torch.tensor -> !torch.tensor
%665 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%666 = torch.aten.view %result0_33, %665 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%667 = torch.aten.mm %666, %664 : !torch.tensor, !torch.tensor -> !torch.tensor
%668 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%669 = torch.operator "aten._unsafe_view"(%667, %668) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%670 = torch.aten.add_.Tensor %669, %arg141, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%671 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%672 = torch.aten.view %670, %671 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%673 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%674 = torch.aten.permute %672, %673 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%675 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%676 = torch.aten.view %652, %675 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%677 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%678 = torch.aten.permute %676, %677 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%679 = torch.aten.transpose.int %663, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%680 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%681 = torch.aten.expand %678, %680, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%682 = torch.operator "aten.clone"(%681, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%683 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%684 = torch.operator "aten._unsafe_view"(%682, %683) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%685 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%686 = torch.aten.expand %679, %685, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%687 = torch.operator "aten.clone"(%686, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%688 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%689 = torch.operator "aten._unsafe_view"(%687, %688) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%690 = torch.aten.bmm %684, %689 : !torch.tensor, !torch.tensor -> !torch.tensor
%691 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%692 = torch.operator "aten._unsafe_view"(%690, %691) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%693 = torch.aten.div.Tensor %692, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%694 = torch.aten.add.Tensor %693, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%695 = torch.aten._softmax %694, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%696 = torch.aten.empty_like %695, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%697 = torch.operator "aten.bernoulli_.float"(%696, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%698 = torch.aten.div_.Scalar %697, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%699 = torch.aten.mul.Tensor %695, %698 : !torch.tensor, !torch.tensor -> !torch.tensor
%700 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%701 = torch.aten.expand %699, %700, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%702 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%703 = torch.aten.view %701, %702 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%704 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%705 = torch.aten.expand %674, %704, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%706 = torch.operator "aten.clone"(%705, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%707 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%708 = torch.operator "aten._unsafe_view"(%706, %707) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%709 = torch.aten.bmm %703, %708 : !torch.tensor, !torch.tensor -> !torch.tensor
%710 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%711 = torch.operator "aten._unsafe_view"(%709, %710) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%712 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%713 = torch.aten.permute %711, %712 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%714 = torch.operator "aten.clone"(%713, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%715 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%716 = torch.aten.view %714, %715 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%717 = torch.aten.t %arg136 : !torch.tensor -> !torch.tensor
%718 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%719 = torch.aten.view %716, %718 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%720 = torch.aten.mm %719, %717 : !torch.tensor, !torch.tensor -> !torch.tensor
%721 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%722 = torch.operator "aten._unsafe_view"(%720, %721) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%723 = torch.aten.add_.Tensor %722, %arg135, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%724 = torch.aten.empty_like %723, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%725 = torch.operator "aten.bernoulli_.float"(%724, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%726 = torch.aten.div_.Scalar %725, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%727 = torch.aten.mul.Tensor %723, %726 : !torch.tensor, !torch.tensor -> !torch.tensor
%728 = torch.aten.add.Tensor %727, %result0_33, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%729 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_36, %result1_37, %result2_38 = torch.aten.native_layer_norm %728, %729, %arg134, %arg133, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%730 = torch.aten.t %arg144 : !torch.tensor -> !torch.tensor
%731 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%732 = torch.aten.view %result0_36, %731 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%733 = torch.aten.mm %732, %730 : !torch.tensor, !torch.tensor -> !torch.tensor
%734 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%735 = torch.operator "aten._unsafe_view"(%733, %734) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%736 = torch.aten.add_.Tensor %735, %arg143, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%737 = torch.aten.gelu %736 : !torch.tensor -> !torch.tensor
%738 = torch.aten.t %arg148 : !torch.tensor -> !torch.tensor
%739 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%740 = torch.aten.view %737, %739 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%741 = torch.aten.mm %740, %738 : !torch.tensor, !torch.tensor -> !torch.tensor
%742 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%743 = torch.operator "aten._unsafe_view"(%741, %742) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%744 = torch.aten.add_.Tensor %743, %arg147, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%745 = torch.aten.empty_like %744, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%746 = torch.operator "aten.bernoulli_.float"(%745, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%747 = torch.aten.div_.Scalar %746, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%748 = torch.aten.mul.Tensor %744, %747 : !torch.tensor, !torch.tensor -> !torch.tensor
%749 = torch.aten.add.Tensor %748, %result0_36, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%750 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_39, %result1_40, %result2_41 = torch.aten.native_layer_norm %749, %750, %arg146, %arg145, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%751 = torch.aten.t %arg156 : !torch.tensor -> !torch.tensor
%752 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%753 = torch.aten.view %result0_39, %752 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%754 = torch.aten.mm %753, %751 : !torch.tensor, !torch.tensor -> !torch.tensor
%755 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%756 = torch.operator "aten._unsafe_view"(%754, %755) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%757 = torch.aten.add_.Tensor %756, %arg155, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%758 = torch.aten.t %arg154 : !torch.tensor -> !torch.tensor
%759 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%760 = torch.aten.view %result0_39, %759 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%761 = torch.aten.mm %760, %758 : !torch.tensor, !torch.tensor -> !torch.tensor
%762 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%763 = torch.operator "aten._unsafe_view"(%761, %762) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%764 = torch.aten.add_.Tensor %763, %arg153, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%765 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%766 = torch.aten.view %764, %765 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%767 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%768 = torch.aten.permute %766, %767 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%769 = torch.aten.t %arg158 : !torch.tensor -> !torch.tensor
%770 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%771 = torch.aten.view %result0_39, %770 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%772 = torch.aten.mm %771, %769 : !torch.tensor, !torch.tensor -> !torch.tensor
%773 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%774 = torch.operator "aten._unsafe_view"(%772, %773) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%775 = torch.aten.add_.Tensor %774, %arg157, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%776 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%777 = torch.aten.view %775, %776 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%778 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%779 = torch.aten.permute %777, %778 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%780 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%781 = torch.aten.view %757, %780 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%782 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%783 = torch.aten.permute %781, %782 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%784 = torch.aten.transpose.int %768, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%785 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%786 = torch.aten.expand %783, %785, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%787 = torch.operator "aten.clone"(%786, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%788 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%789 = torch.operator "aten._unsafe_view"(%787, %788) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%790 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%791 = torch.aten.expand %784, %790, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%792 = torch.operator "aten.clone"(%791, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%793 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%794 = torch.operator "aten._unsafe_view"(%792, %793) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%795 = torch.aten.bmm %789, %794 : !torch.tensor, !torch.tensor -> !torch.tensor
%796 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%797 = torch.operator "aten._unsafe_view"(%795, %796) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%798 = torch.aten.div.Tensor %797, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%799 = torch.aten.add.Tensor %798, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%800 = torch.aten._softmax %799, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%801 = torch.aten.empty_like %800, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%802 = torch.operator "aten.bernoulli_.float"(%801, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%803 = torch.aten.div_.Scalar %802, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%804 = torch.aten.mul.Tensor %800, %803 : !torch.tensor, !torch.tensor -> !torch.tensor
%805 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%806 = torch.aten.expand %804, %805, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%807 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%808 = torch.aten.view %806, %807 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%809 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%810 = torch.aten.expand %779, %809, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%811 = torch.operator "aten.clone"(%810, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%812 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%813 = torch.operator "aten._unsafe_view"(%811, %812) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%814 = torch.aten.bmm %808, %813 : !torch.tensor, !torch.tensor -> !torch.tensor
%815 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%816 = torch.operator "aten._unsafe_view"(%814, %815) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%817 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%818 = torch.aten.permute %816, %817 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%819 = torch.operator "aten.clone"(%818, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%820 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%821 = torch.aten.view %819, %820 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%822 = torch.aten.t %arg152 : !torch.tensor -> !torch.tensor
%823 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%824 = torch.aten.view %821, %823 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%825 = torch.aten.mm %824, %822 : !torch.tensor, !torch.tensor -> !torch.tensor
%826 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%827 = torch.operator "aten._unsafe_view"(%825, %826) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%828 = torch.aten.add_.Tensor %827, %arg151, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%829 = torch.aten.empty_like %828, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%830 = torch.operator "aten.bernoulli_.float"(%829, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%831 = torch.aten.div_.Scalar %830, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%832 = torch.aten.mul.Tensor %828, %831 : !torch.tensor, !torch.tensor -> !torch.tensor
%833 = torch.aten.add.Tensor %832, %result0_39, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%834 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_42, %result1_43, %result2_44 = torch.aten.native_layer_norm %833, %834, %arg150, %arg149, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%835 = torch.aten.t %arg160 : !torch.tensor -> !torch.tensor
%836 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%837 = torch.aten.view %result0_42, %836 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%838 = torch.aten.mm %837, %835 : !torch.tensor, !torch.tensor -> !torch.tensor
%839 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%840 = torch.operator "aten._unsafe_view"(%838, %839) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%841 = torch.aten.add_.Tensor %840, %arg159, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%842 = torch.aten.gelu %841 : !torch.tensor -> !torch.tensor
%843 = torch.aten.t %arg164 : !torch.tensor -> !torch.tensor
%844 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%845 = torch.aten.view %842, %844 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%846 = torch.aten.mm %845, %843 : !torch.tensor, !torch.tensor -> !torch.tensor
%847 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%848 = torch.operator "aten._unsafe_view"(%846, %847) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%849 = torch.aten.add_.Tensor %848, %arg163, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%850 = torch.aten.empty_like %849, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%851 = torch.operator "aten.bernoulli_.float"(%850, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%852 = torch.aten.div_.Scalar %851, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%853 = torch.aten.mul.Tensor %849, %852 : !torch.tensor, !torch.tensor -> !torch.tensor
%854 = torch.aten.add.Tensor %853, %result0_42, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%855 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_45, %result1_46, %result2_47 = torch.aten.native_layer_norm %854, %855, %arg162, %arg161, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%856 = torch.aten.t %arg172 : !torch.tensor -> !torch.tensor
%857 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%858 = torch.aten.view %result0_45, %857 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%859 = torch.aten.mm %858, %856 : !torch.tensor, !torch.tensor -> !torch.tensor
%860 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%861 = torch.operator "aten._unsafe_view"(%859, %860) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%862 = torch.aten.add_.Tensor %861, %arg171, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%863 = torch.aten.t %arg170 : !torch.tensor -> !torch.tensor
%864 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%865 = torch.aten.view %result0_45, %864 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%866 = torch.aten.mm %865, %863 : !torch.tensor, !torch.tensor -> !torch.tensor
%867 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%868 = torch.operator "aten._unsafe_view"(%866, %867) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%869 = torch.aten.add_.Tensor %868, %arg169, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%870 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%871 = torch.aten.view %869, %870 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%872 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%873 = torch.aten.permute %871, %872 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%874 = torch.aten.t %arg174 : !torch.tensor -> !torch.tensor
%875 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%876 = torch.aten.view %result0_45, %875 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%877 = torch.aten.mm %876, %874 : !torch.tensor, !torch.tensor -> !torch.tensor
%878 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%879 = torch.operator "aten._unsafe_view"(%877, %878) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%880 = torch.aten.add_.Tensor %879, %arg173, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%881 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%882 = torch.aten.view %880, %881 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%883 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%884 = torch.aten.permute %882, %883 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%885 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%886 = torch.aten.view %862, %885 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%887 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%888 = torch.aten.permute %886, %887 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%889 = torch.aten.transpose.int %873, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%890 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%891 = torch.aten.expand %888, %890, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%892 = torch.operator "aten.clone"(%891, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%893 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%894 = torch.operator "aten._unsafe_view"(%892, %893) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%895 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%896 = torch.aten.expand %889, %895, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%897 = torch.operator "aten.clone"(%896, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%898 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%899 = torch.operator "aten._unsafe_view"(%897, %898) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%900 = torch.aten.bmm %894, %899 : !torch.tensor, !torch.tensor -> !torch.tensor
%901 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%902 = torch.operator "aten._unsafe_view"(%900, %901) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%903 = torch.aten.div.Tensor %902, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%904 = torch.aten.add.Tensor %903, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%905 = torch.aten._softmax %904, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%906 = torch.aten.empty_like %905, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%907 = torch.operator "aten.bernoulli_.float"(%906, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%908 = torch.aten.div_.Scalar %907, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%909 = torch.aten.mul.Tensor %905, %908 : !torch.tensor, !torch.tensor -> !torch.tensor
%910 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%911 = torch.aten.expand %909, %910, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%912 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%913 = torch.aten.view %911, %912 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%914 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%915 = torch.aten.expand %884, %914, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%916 = torch.operator "aten.clone"(%915, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%917 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%918 = torch.operator "aten._unsafe_view"(%916, %917) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%919 = torch.aten.bmm %913, %918 : !torch.tensor, !torch.tensor -> !torch.tensor
%920 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%921 = torch.operator "aten._unsafe_view"(%919, %920) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%922 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%923 = torch.aten.permute %921, %922 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%924 = torch.operator "aten.clone"(%923, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%925 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%926 = torch.aten.view %924, %925 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%927 = torch.aten.t %arg168 : !torch.tensor -> !torch.tensor
%928 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%929 = torch.aten.view %926, %928 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%930 = torch.aten.mm %929, %927 : !torch.tensor, !torch.tensor -> !torch.tensor
%931 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%932 = torch.operator "aten._unsafe_view"(%930, %931) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%933 = torch.aten.add_.Tensor %932, %arg167, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%934 = torch.aten.empty_like %933, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%935 = torch.operator "aten.bernoulli_.float"(%934, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%936 = torch.aten.div_.Scalar %935, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%937 = torch.aten.mul.Tensor %933, %936 : !torch.tensor, !torch.tensor -> !torch.tensor
%938 = torch.aten.add.Tensor %937, %result0_45, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%939 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_48, %result1_49, %result2_50 = torch.aten.native_layer_norm %938, %939, %arg166, %arg165, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%940 = torch.aten.t %arg176 : !torch.tensor -> !torch.tensor
%941 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%942 = torch.aten.view %result0_48, %941 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%943 = torch.aten.mm %942, %940 : !torch.tensor, !torch.tensor -> !torch.tensor
%944 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%945 = torch.operator "aten._unsafe_view"(%943, %944) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%946 = torch.aten.add_.Tensor %945, %arg175, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%947 = torch.aten.gelu %946 : !torch.tensor -> !torch.tensor
%948 = torch.aten.t %arg180 : !torch.tensor -> !torch.tensor
%949 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%950 = torch.aten.view %947, %949 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%951 = torch.aten.mm %950, %948 : !torch.tensor, !torch.tensor -> !torch.tensor
%952 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%953 = torch.operator "aten._unsafe_view"(%951, %952) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%954 = torch.aten.add_.Tensor %953, %arg179, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%955 = torch.aten.empty_like %954, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%956 = torch.operator "aten.bernoulli_.float"(%955, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%957 = torch.aten.div_.Scalar %956, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%958 = torch.aten.mul.Tensor %954, %957 : !torch.tensor, !torch.tensor -> !torch.tensor
%959 = torch.aten.add.Tensor %958, %result0_48, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%960 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_51, %result1_52, %result2_53 = torch.aten.native_layer_norm %959, %960, %arg178, %arg177, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%961 = torch.aten.t %arg188 : !torch.tensor -> !torch.tensor
%962 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%963 = torch.aten.view %result0_51, %962 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%964 = torch.aten.mm %963, %961 : !torch.tensor, !torch.tensor -> !torch.tensor
%965 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%966 = torch.operator "aten._unsafe_view"(%964, %965) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%967 = torch.aten.add_.Tensor %966, %arg187, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%968 = torch.aten.t %arg186 : !torch.tensor -> !torch.tensor
%969 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%970 = torch.aten.view %result0_51, %969 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%971 = torch.aten.mm %970, %968 : !torch.tensor, !torch.tensor -> !torch.tensor
%972 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%973 = torch.operator "aten._unsafe_view"(%971, %972) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%974 = torch.aten.add_.Tensor %973, %arg185, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%975 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%976 = torch.aten.view %974, %975 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%977 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%978 = torch.aten.permute %976, %977 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%979 = torch.aten.t %arg190 : !torch.tensor -> !torch.tensor
%980 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%981 = torch.aten.view %result0_51, %980 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%982 = torch.aten.mm %981, %979 : !torch.tensor, !torch.tensor -> !torch.tensor
%983 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%984 = torch.operator "aten._unsafe_view"(%982, %983) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%985 = torch.aten.add_.Tensor %984, %arg189, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%986 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%987 = torch.aten.view %985, %986 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%988 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%989 = torch.aten.permute %987, %988 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%990 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%991 = torch.aten.view %967, %990 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%992 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%993 = torch.aten.permute %991, %992 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%994 = torch.aten.transpose.int %978, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%995 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%996 = torch.aten.expand %993, %995, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%997 = torch.operator "aten.clone"(%996, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%998 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%999 = torch.operator "aten._unsafe_view"(%997, %998) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1000 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1001 = torch.aten.expand %994, %1000, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1002 = torch.operator "aten.clone"(%1001, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1003 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1004 = torch.operator "aten._unsafe_view"(%1002, %1003) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1005 = torch.aten.bmm %999, %1004 : !torch.tensor, !torch.tensor -> !torch.tensor
%1006 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1007 = torch.operator "aten._unsafe_view"(%1005, %1006) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1008 = torch.aten.div.Tensor %1007, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1009 = torch.aten.add.Tensor %1008, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1010 = torch.aten._softmax %1009, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1011 = torch.aten.empty_like %1010, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1012 = torch.operator "aten.bernoulli_.float"(%1011, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1013 = torch.aten.div_.Scalar %1012, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1014 = torch.aten.mul.Tensor %1010, %1013 : !torch.tensor, !torch.tensor -> !torch.tensor
%1015 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1016 = torch.aten.expand %1014, %1015, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1017 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1018 = torch.aten.view %1016, %1017 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1019 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1020 = torch.aten.expand %989, %1019, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1021 = torch.operator "aten.clone"(%1020, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1022 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1023 = torch.operator "aten._unsafe_view"(%1021, %1022) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1024 = torch.aten.bmm %1018, %1023 : !torch.tensor, !torch.tensor -> !torch.tensor
%1025 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1026 = torch.operator "aten._unsafe_view"(%1024, %1025) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1027 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1028 = torch.aten.permute %1026, %1027 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1029 = torch.operator "aten.clone"(%1028, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1030 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1031 = torch.aten.view %1029, %1030 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1032 = torch.aten.t %arg184 : !torch.tensor -> !torch.tensor
%1033 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1034 = torch.aten.view %1031, %1033 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1035 = torch.aten.mm %1034, %1032 : !torch.tensor, !torch.tensor -> !torch.tensor
%1036 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1037 = torch.operator "aten._unsafe_view"(%1035, %1036) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1038 = torch.aten.add_.Tensor %1037, %arg183, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1039 = torch.aten.empty_like %1038, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1040 = torch.operator "aten.bernoulli_.float"(%1039, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1041 = torch.aten.div_.Scalar %1040, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1042 = torch.aten.mul.Tensor %1038, %1041 : !torch.tensor, !torch.tensor -> !torch.tensor
%1043 = torch.aten.add.Tensor %1042, %result0_51, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1044 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_54, %result1_55, %result2_56 = torch.aten.native_layer_norm %1043, %1044, %arg182, %arg181, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1045 = torch.aten.t %arg192 : !torch.tensor -> !torch.tensor
%1046 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1047 = torch.aten.view %result0_54, %1046 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1048 = torch.aten.mm %1047, %1045 : !torch.tensor, !torch.tensor -> !torch.tensor
%1049 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1050 = torch.operator "aten._unsafe_view"(%1048, %1049) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1051 = torch.aten.add_.Tensor %1050, %arg191, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1052 = torch.aten.gelu %1051 : !torch.tensor -> !torch.tensor
%1053 = torch.aten.t %arg196 : !torch.tensor -> !torch.tensor
%1054 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1055 = torch.aten.view %1052, %1054 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1056 = torch.aten.mm %1055, %1053 : !torch.tensor, !torch.tensor -> !torch.tensor
%1057 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1058 = torch.operator "aten._unsafe_view"(%1056, %1057) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1059 = torch.aten.add_.Tensor %1058, %arg195, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1060 = torch.aten.empty_like %1059, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1061 = torch.operator "aten.bernoulli_.float"(%1060, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1062 = torch.aten.div_.Scalar %1061, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1063 = torch.aten.mul.Tensor %1059, %1062 : !torch.tensor, !torch.tensor -> !torch.tensor
%1064 = torch.aten.add.Tensor %1063, %result0_54, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1065 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_57, %result1_58, %result2_59 = torch.aten.native_layer_norm %1064, %1065, %arg194, %arg193, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1066 = torch.aten.t %arg44 : !torch.tensor -> !torch.tensor
%1067 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1068 = torch.aten.view %result0_57, %1067 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1069 = torch.aten.mm %1068, %1066 : !torch.tensor, !torch.tensor -> !torch.tensor
%1070 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1071 = torch.operator "aten._unsafe_view"(%1069, %1070) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1072 = torch.aten.add_.Tensor %1071, %arg43, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1073 = torch.aten.t %arg42 : !torch.tensor -> !torch.tensor
%1074 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1075 = torch.aten.view %result0_57, %1074 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1076 = torch.aten.mm %1075, %1073 : !torch.tensor, !torch.tensor -> !torch.tensor
%1077 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1078 = torch.operator "aten._unsafe_view"(%1076, %1077) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1079 = torch.aten.add_.Tensor %1078, %arg41, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1080 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1081 = torch.aten.view %1079, %1080 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1082 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1083 = torch.aten.permute %1081, %1082 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1084 = torch.aten.t %arg46 : !torch.tensor -> !torch.tensor
%1085 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1086 = torch.aten.view %result0_57, %1085 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1087 = torch.aten.mm %1086, %1084 : !torch.tensor, !torch.tensor -> !torch.tensor
%1088 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1089 = torch.operator "aten._unsafe_view"(%1087, %1088) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1090 = torch.aten.add_.Tensor %1089, %arg45, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1091 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1092 = torch.aten.view %1090, %1091 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1093 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1094 = torch.aten.permute %1092, %1093 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1095 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1096 = torch.aten.view %1072, %1095 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1097 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1098 = torch.aten.permute %1096, %1097 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1099 = torch.aten.transpose.int %1083, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1100 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1101 = torch.aten.expand %1098, %1100, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1102 = torch.operator "aten.clone"(%1101, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1103 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1104 = torch.operator "aten._unsafe_view"(%1102, %1103) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1105 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1106 = torch.aten.expand %1099, %1105, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1107 = torch.operator "aten.clone"(%1106, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1108 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1109 = torch.operator "aten._unsafe_view"(%1107, %1108) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1110 = torch.aten.bmm %1104, %1109 : !torch.tensor, !torch.tensor -> !torch.tensor
%1111 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1112 = torch.operator "aten._unsafe_view"(%1110, %1111) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1113 = torch.aten.div.Tensor %1112, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1114 = torch.aten.add.Tensor %1113, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1115 = torch.aten._softmax %1114, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1116 = torch.aten.empty_like %1115, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1117 = torch.operator "aten.bernoulli_.float"(%1116, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1118 = torch.aten.div_.Scalar %1117, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1119 = torch.aten.mul.Tensor %1115, %1118 : !torch.tensor, !torch.tensor -> !torch.tensor
%1120 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1121 = torch.aten.expand %1119, %1120, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1122 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1123 = torch.aten.view %1121, %1122 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1124 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1125 = torch.aten.expand %1094, %1124, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1126 = torch.operator "aten.clone"(%1125, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1127 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1128 = torch.operator "aten._unsafe_view"(%1126, %1127) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1129 = torch.aten.bmm %1123, %1128 : !torch.tensor, !torch.tensor -> !torch.tensor
%1130 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1131 = torch.operator "aten._unsafe_view"(%1129, %1130) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1132 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1133 = torch.aten.permute %1131, %1132 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1134 = torch.operator "aten.clone"(%1133, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1135 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1136 = torch.aten.view %1134, %1135 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1137 = torch.aten.t %arg40 : !torch.tensor -> !torch.tensor
%1138 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1139 = torch.aten.view %1136, %1138 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1140 = torch.aten.mm %1139, %1137 : !torch.tensor, !torch.tensor -> !torch.tensor
%1141 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1142 = torch.operator "aten._unsafe_view"(%1140, %1141) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1143 = torch.aten.add_.Tensor %1142, %arg39, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1144 = torch.aten.empty_like %1143, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1145 = torch.operator "aten.bernoulli_.float"(%1144, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1146 = torch.aten.div_.Scalar %1145, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1147 = torch.aten.mul.Tensor %1143, %1146 : !torch.tensor, !torch.tensor -> !torch.tensor
%1148 = torch.aten.add.Tensor %1147, %result0_57, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1149 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_60, %result1_61, %result2_62 = torch.aten.native_layer_norm %1148, %1149, %arg38, %arg37, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1150 = torch.aten.t %arg48 : !torch.tensor -> !torch.tensor
%1151 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1152 = torch.aten.view %result0_60, %1151 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1153 = torch.aten.mm %1152, %1150 : !torch.tensor, !torch.tensor -> !torch.tensor
%1154 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1155 = torch.operator "aten._unsafe_view"(%1153, %1154) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1156 = torch.aten.add_.Tensor %1155, %arg47, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1157 = torch.aten.gelu %1156 : !torch.tensor -> !torch.tensor
%1158 = torch.aten.t %arg52 : !torch.tensor -> !torch.tensor
%1159 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1160 = torch.aten.view %1157, %1159 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1161 = torch.aten.mm %1160, %1158 : !torch.tensor, !torch.tensor -> !torch.tensor
%1162 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1163 = torch.operator "aten._unsafe_view"(%1161, %1162) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1164 = torch.aten.add_.Tensor %1163, %arg51, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1165 = torch.aten.empty_like %1164, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1166 = torch.operator "aten.bernoulli_.float"(%1165, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1167 = torch.aten.div_.Scalar %1166, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1168 = torch.aten.mul.Tensor %1164, %1167 : !torch.tensor, !torch.tensor -> !torch.tensor
%1169 = torch.aten.add.Tensor %1168, %result0_60, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1170 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_63, %result1_64, %result2_65 = torch.aten.native_layer_norm %1169, %1170, %arg50, %arg49, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1171 = torch.aten.t %arg60 : !torch.tensor -> !torch.tensor
%1172 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1173 = torch.aten.view %result0_63, %1172 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1174 = torch.aten.mm %1173, %1171 : !torch.tensor, !torch.tensor -> !torch.tensor
%1175 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1176 = torch.operator "aten._unsafe_view"(%1174, %1175) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1177 = torch.aten.add_.Tensor %1176, %arg59, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1178 = torch.aten.t %arg58 : !torch.tensor -> !torch.tensor
%1179 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1180 = torch.aten.view %result0_63, %1179 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1181 = torch.aten.mm %1180, %1178 : !torch.tensor, !torch.tensor -> !torch.tensor
%1182 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1183 = torch.operator "aten._unsafe_view"(%1181, %1182) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1184 = torch.aten.add_.Tensor %1183, %arg57, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1185 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1186 = torch.aten.view %1184, %1185 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1187 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1188 = torch.aten.permute %1186, %1187 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1189 = torch.aten.t %arg62 : !torch.tensor -> !torch.tensor
%1190 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1191 = torch.aten.view %result0_63, %1190 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1192 = torch.aten.mm %1191, %1189 : !torch.tensor, !torch.tensor -> !torch.tensor
%1193 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1194 = torch.operator "aten._unsafe_view"(%1192, %1193) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1195 = torch.aten.add_.Tensor %1194, %arg61, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1196 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1197 = torch.aten.view %1195, %1196 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1198 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1199 = torch.aten.permute %1197, %1198 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1200 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1201 = torch.aten.view %1177, %1200 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1202 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1203 = torch.aten.permute %1201, %1202 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1204 = torch.aten.transpose.int %1188, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1205 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1206 = torch.aten.expand %1203, %1205, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1207 = torch.operator "aten.clone"(%1206, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1208 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1209 = torch.operator "aten._unsafe_view"(%1207, %1208) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1210 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1211 = torch.aten.expand %1204, %1210, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1212 = torch.operator "aten.clone"(%1211, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1213 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1214 = torch.operator "aten._unsafe_view"(%1212, %1213) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1215 = torch.aten.bmm %1209, %1214 : !torch.tensor, !torch.tensor -> !torch.tensor
%1216 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1217 = torch.operator "aten._unsafe_view"(%1215, %1216) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1218 = torch.aten.div.Tensor %1217, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1219 = torch.aten.add.Tensor %1218, %0, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1220 = torch.aten._softmax %1219, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1221 = torch.aten.empty_like %1220, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1222 = torch.operator "aten.bernoulli_.float"(%1221, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1223 = torch.aten.div_.Scalar %1222, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1224 = torch.aten.mul.Tensor %1220, %1223 : !torch.tensor, !torch.tensor -> !torch.tensor
%1225 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1226 = torch.aten.expand %1224, %1225, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1227 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1228 = torch.aten.view %1226, %1227 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1229 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1230 = torch.aten.expand %1199, %1229, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1231 = torch.operator "aten.clone"(%1230, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1232 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1233 = torch.operator "aten._unsafe_view"(%1231, %1232) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1234 = torch.aten.bmm %1228, %1233 : !torch.tensor, !torch.tensor -> !torch.tensor
%1235 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1236 = torch.operator "aten._unsafe_view"(%1234, %1235) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1237 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1238 = torch.aten.permute %1236, %1237 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1239 = torch.operator "aten.clone"(%1238, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1240 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1241 = torch.aten.view %1239, %1240 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1242 = torch.aten.t %arg56 : !torch.tensor -> !torch.tensor
%1243 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1244 = torch.aten.view %1241, %1243 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1245 = torch.aten.mm %1244, %1242 : !torch.tensor, !torch.tensor -> !torch.tensor
%1246 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1247 = torch.operator "aten._unsafe_view"(%1245, %1246) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1248 = torch.aten.add_.Tensor %1247, %arg55, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1249 = torch.aten.empty_like %1248, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1250 = torch.operator "aten.bernoulli_.float"(%1249, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1251 = torch.aten.div_.Scalar %1250, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1252 = torch.aten.mul.Tensor %1248, %1251 : !torch.tensor, !torch.tensor -> !torch.tensor
%1253 = torch.aten.add.Tensor %1252, %result0_63, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1254 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_66, %result1_67, %result2_68 = torch.aten.native_layer_norm %1253, %1254, %arg54, %arg53, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1255 = torch.aten.t %arg64 : !torch.tensor -> !torch.tensor
%1256 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1257 = torch.aten.view %result0_66, %1256 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1258 = torch.aten.mm %1257, %1255 : !torch.tensor, !torch.tensor -> !torch.tensor
%1259 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1260 = torch.operator "aten._unsafe_view"(%1258, %1259) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1261 = torch.aten.add_.Tensor %1260, %arg63, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1262 = torch.aten.gelu %1261 : !torch.tensor -> !torch.tensor
%1263 = torch.aten.t %arg68 : !torch.tensor -> !torch.tensor
%1264 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1265 = torch.aten.view %1262, %1264 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1266 = torch.aten.mm %1265, %1263 : !torch.tensor, !torch.tensor -> !torch.tensor
%1267 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1268 = torch.operator "aten._unsafe_view"(%1266, %1267) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1269 = torch.aten.add_.Tensor %1268, %arg67, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1270 = torch.aten.empty_like %1269, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1271 = torch.operator "aten.bernoulli_.float"(%1270, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1272 = torch.aten.div_.Scalar %1271, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1273 = torch.aten.mul.Tensor %1269, %1272 : !torch.tensor, !torch.tensor -> !torch.tensor
%1274 = torch.aten.add.Tensor %1273, %result0_66, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1275 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_69, %result1_70, %result2_71 = torch.aten.native_layer_norm %1274, %1275, %arg66, %arg65, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1276 = torch.aten.t %arg203 : !torch.tensor -> !torch.tensor
%1277 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1278 = torch.aten.view %result0_69, %1277 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1279 = torch.aten.mm %1278, %1276 : !torch.tensor, !torch.tensor -> !torch.tensor
%1280 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1281 = torch.operator "aten._unsafe_view"(%1279, %1280) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1282 = torch.aten.add_.Tensor %1281, %arg202, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1283 = torch.aten.gelu %1282 : !torch.tensor -> !torch.tensor
%1284 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_72, %result1_73, %result2_74 = torch.aten.native_layer_norm %1283, %1284, %arg201, %arg200, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1285 = torch.aten.t %arg199 : !torch.tensor -> !torch.tensor
%1286 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1287 = torch.aten.view %result0_72, %1286 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1288 = torch.aten.mm %1287, %1285 : !torch.tensor, !torch.tensor -> !torch.tensor
%1289 = torch.prim.ListConstruct %int4, %int512, %int30522 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1290 = torch.operator "aten._unsafe_view"(%1288, %1289) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1291 = torch.aten.add_.Tensor %1290, %arg198, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1292 = torch.prim.ListConstruct %int-1, %int30522 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1293 = torch.aten.view %1291, %1292 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1294 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<!torch.int>
%1295 = torch.aten.view %arg207, %1294 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1296 = torch.operator "aten._log_softmax"(%1293, %int1, %false) : (!torch.tensor, !torch.int, !torch.bool) -> !torch.tensor
%output, %total_weight = torch.aten.nll_loss_forward %1296, %1295, %none, %int1, %int-100 : !torch.tensor, !torch.tensor, !torch.none, !torch.int, !torch.int -> !torch.tensor, !torch.tensor
%1297 = torch.aten.transpose.int %1228, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1298 = torch.aten.transpose.int %1233, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1299 = torch.aten.transpose.int %1209, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1300 = torch.aten.transpose.int %1214, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1301 = torch.aten.transpose.int %1123, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1302 = torch.aten.transpose.int %1128, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1303 = torch.aten.transpose.int %1104, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1304 = torch.aten.transpose.int %1109, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1305 = torch.aten.transpose.int %1018, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1306 = torch.aten.transpose.int %1023, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1307 = torch.aten.transpose.int %999, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1308 = torch.aten.transpose.int %1004, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1309 = torch.aten.transpose.int %913, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1310 = torch.aten.transpose.int %918, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1311 = torch.aten.transpose.int %894, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1312 = torch.aten.transpose.int %899, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1313 = torch.aten.transpose.int %808, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1314 = torch.aten.transpose.int %813, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1315 = torch.aten.transpose.int %789, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1316 = torch.aten.transpose.int %794, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1317 = torch.aten.transpose.int %703, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1318 = torch.aten.transpose.int %708, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1319 = torch.aten.transpose.int %684, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1320 = torch.aten.transpose.int %689, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1321 = torch.aten.transpose.int %598, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1322 = torch.aten.transpose.int %603, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1323 = torch.aten.transpose.int %579, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1324 = torch.aten.transpose.int %584, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1325 = torch.aten.transpose.int %493, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1326 = torch.aten.transpose.int %498, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1327 = torch.aten.transpose.int %474, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1328 = torch.aten.transpose.int %479, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1329 = torch.aten.transpose.int %388, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1330 = torch.aten.transpose.int %393, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1331 = torch.aten.transpose.int %369, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1332 = torch.aten.transpose.int %374, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1333 = torch.aten.transpose.int %283, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1334 = torch.aten.transpose.int %288, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1335 = torch.aten.transpose.int %264, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1336 = torch.aten.transpose.int %269, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1337 = torch.aten.transpose.int %178, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1338 = torch.aten.transpose.int %183, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1339 = torch.aten.transpose.int %159, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1340 = torch.aten.transpose.int %164, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1341 = torch.aten.transpose.int %73, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1342 = torch.aten.transpose.int %78, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1343 = torch.aten.transpose.int %54, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1344 = torch.aten.transpose.int %59, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1345 = torch.prim.ListConstruct %output, %1291, %306, %result2_20, %arg98, %result2_50, %1013, %1274, %arg36, %result1_16, %824, %590, %1075, %arg120, %arg37, %445, %result2_5, %arg161, %arg26, %1337, %1244, %result2_11, %result1_7, %arg100, %1223, %arg76, %arg136, %123, %arg94, %1296, %509, %result1_10, %arg116, %320, %593, %732, %arg177, %arg16, %539, %arg133, %arg49, %963, %result1_52, %arg70, %719, %arg40, %arg33, %arg174, %result2_2, %arg60, %arg53, %4, %1115, %203, %arg118, %arg65, %arg200, %result1_37, %arg30, %arg90, %1321, %456, %1329, %result1_46, %1152, %1313, %arg0, %96, %1331, %arg112, %655, %result2_35, %arg66, %arg130, %arg38, %arg194, %arg166, %1220, %333, %25, %1251, %327, %arg149, %result1_34, %516, %1300, %627, %518, %852, %arg188, %result1_4, %result2_56, %1333, %1302, %arg158, %905, %arg44, %278, %438, %211, %result2_41, %1324, %854, %arg152, %1334, %858, %arg81, %876, %488, %arg28, %result2_44, %arg181, %117, %result1_22, %1086, %arg150, %result1_49, %380, %arg145, %1315, %arg140, %1307, %5, %result2_47, %arg192, %946, %1323, %arg92, %621, %arg21, %1325, %arg42, %arg186, %result2_53, %614, %1339, %215, %981, %result2_23, %result1_28, %14, %arg138, %arg193, %arg154, %arg203, %65, %result1_43, %arg46, %666, %1055, %1309, %arg146, %result1_73, %1156, %1328, %arg182, %arg180, %1318, %740, %1299, %383, %1327, %803, %arg104, %1322, %68, %194, %1051, %1265, %1139, %arg17, %result2_38, %222, %299, %arg106, %644, %207, %result1_70, %831, %432, %170, %arg85, %938, %957, %1343, %result1, %561, %1303, %arg74, %1068, %89, %485, %arg64, %arg190, %417, %1146, %312, %arg160, %arg124, %1338, %result1_13, %result2_68, %arg164, %arg18, %425, %1341, %970, %arg68, %result2_8, %arg101, %result2_17, %arg1, %942, %1278, %arg88, %275, %411, %434, %1312, %arg128, %1314, %arg80, %749, %arg62, %1010, %1160, %1272, %1173, %1298, %arg110, %result1_55, %543, %800, %arg184, %929, %arg178, %arg113, %arg48, %698, %1287, %result1_31, %1308, %1043, %1047, %1261, %1304, %18, %result2_26, %arg97, %1305, %arg14, %936, %695, %arg84, %228, %760, %173, %result2_71, %arg86, %arg134, %642, %908, %result2_65, %1335, %arg122, %arg54, %arg58, %total_weight, %771, %1332, %result1_67, %1317, %arg129, %1167, %arg132, %246, %753, %arg96, %result1_64, %result2, %1330, %result1_40, %841, %arg20, %result1_25, %1320, %329, %736, %result2_14, %result1_58, %result2_29, %1148, %98, %351, %arg10, %141, %1253, %308, %arg24, %550, %arg108, %1344, %1257, %1342, %arg69, %arg201, %arg206, %413, %arg126, %833, %arg32, %arg56, %845, %623, %537, %arg142, %result1_19, %1301, %result1_1, %201, %1310, %1340, %1326, %36, %arg78, %arg196, %1336, %10, %result2_32, %1311, %648, %421, %235, %726, %arg148, %arg199, %arg176, %arg168, %1062, %1295, %1319, %arg6, %arg162, %950, %1296, %728, %959, %result2_62, %340, %1118, %result2_74, %arg117, %1282, %119, %arg114, %1180, %arg12, %106, %arg72, %1297, %arg52, %1169, %result2_59, %result1_61, %arg50, %arg172, %arg8, %631, %865, %arg144, %130, %arg34, %1064, %635, %747, %404, %1041, %arg82, %1191, %526, %102, %837, %224, %arg170, %1034, %530, %arg165, %arg22, %arg102, %arg5, %110, %1316, %1306, %arg156, %316, %522 : (!torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor) -> !torch.list<!torch.tensor>
return %1345 : !torch.list<!torch.tensor>
}
// -----// IR Dump After Inliner //----- //
module attributes {torch.debug_module_name = "GraphModule"} {
func @forward(%arg0: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg1: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg2: !torch.tensor {torch.type_bound = !torch.vtensor<[512,768],f32>}, %arg3: !torch.tensor {torch.type_bound = !torch.vtensor<[2,768],f32>}, %arg4: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,768],f32>}, %arg5: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg6: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg7: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg8: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg9: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg10: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg11: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg12: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg13: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg14: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg15: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg16: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg17: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg18: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg19: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg20: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg21: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg22: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg23: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg24: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg25: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg26: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg27: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg28: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg29: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg30: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg31: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg32: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg33: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg34: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg35: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg36: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg37: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg38: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg39: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg40: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg41: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg42: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg43: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg44: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg45: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg46: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg47: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg48: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg49: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg50: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg51: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg52: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg53: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg54: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg55: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg56: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg57: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg58: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg59: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg60: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg61: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg62: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg63: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg64: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg65: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg66: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg67: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg68: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg69: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg70: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg71: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg72: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg73: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg74: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg75: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg76: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg77: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg78: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg79: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg80: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg81: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg82: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg83: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg84: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg85: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg86: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg87: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg88: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg89: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg90: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg91: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg92: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg93: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg94: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg95: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg96: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg97: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg98: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg99: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg100: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg101: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg102: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg103: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg104: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg105: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg106: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg107: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg108: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg109: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg110: !torch.tensor {torch.type_bou
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment