Skip to content

Instantly share code, notes, and snippets.

@pashu123
Created February 9, 2022 18:30
Show Gist options
  • Save pashu123/2eb1dcc5da3bccb7ca207754ec146504 to your computer and use it in GitHub Desktop.
Save pashu123/2eb1dcc5da3bccb7ca207754ec146504 to your computer and use it in GitHub Desktop.
module attributes {torch.debug_module_name = "GraphModule"} {
func private @__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule.forward(%arg0: !torch.nn.Module<"__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule">, %arg1: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg2: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg3: !torch.tensor {torch.type_bound = !torch.vtensor<[512,768],f32>}, %arg4: !torch.tensor {torch.type_bound = !torch.vtensor<[2,768],f32>}, %arg5: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,768],f32>}, %arg6: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg7: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg8: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg9: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg10: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg11: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg12: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg13: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg14: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg15: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg16: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg17: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg18: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg19: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg20: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg21: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg22: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg23: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg24: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg25: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg26: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg27: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg28: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg29: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg30: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg31: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg32: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg33: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg34: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg35: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg36: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg37: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg38: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg39: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg40: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg41: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg42: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg43: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg44: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg45: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg46: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg47: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg48: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg49: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg50: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg51: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg52: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg53: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg54: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg55: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg56: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg57: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg58: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg59: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg60: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg61: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg62: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg63: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg64: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg65: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg66: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg67: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg68: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg69: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg70: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg71: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg72: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg73: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg74: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg75: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg76: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg77: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg78: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg79: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg80: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg81: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg82: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg83: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg84: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg85: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg86: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg87: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg88: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg89: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg90: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg91: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg92: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg93: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg94: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg95: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg96: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg97: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg98: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg99: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg100: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg101: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg102: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg103: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg104: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg105: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg106: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg107: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg108: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg109: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg110: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg111: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg112: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg113: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg114: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg115: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg116: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg117: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg118: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg119: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg120: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg121: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg122: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg123: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg124: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg125: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg126: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg127: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg128: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg129: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg130: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg131: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg132: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg133: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg134: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg135: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg136: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg137: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg138: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg139: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg140: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg141: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg142: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg143: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg144: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg145: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg146: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg147: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg148: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg149: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg150: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg151: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg152: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg153: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg154: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg155: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg156: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg157: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg158: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg159: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg160: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg161: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg162: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg163: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg164: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg165: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg166: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg167: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg168: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg169: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg170: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg171: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg172: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg173: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg174: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg175: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg176: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg177: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg178: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg179: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg180: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg181: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg182: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg183: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg184: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg185: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg186: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg187: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg188: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg189: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg190: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg191: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg192: !torch.tensor {torch.type_bound = !torch.vtensor<[3072],f32>}, %arg193: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg194: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg195: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg196: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg197: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg198: !torch.tensor {torch.type_bound = !torch.vtensor<[30522],f32>}, %arg199: !torch.tensor {torch.type_bound = !torch.vtensor<[30522],f32>}, %arg200: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,768],f32>}, %arg201: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg202: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg203: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg204: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg205: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg206: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg207: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512],si64>}, %arg208: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512],si64>}) -> !torch.list<!torch.tensor> {
%1 = torch.tensor.literal(dense<-0.000000e+00> : tensor<4x1x1x512xf32>) : !torch.tensor<[4,1,1,512],f32>
%2 = torch.tensor.literal(dense<8.000000e+00> : tensor<f64>) : !torch.tensor<[],f64>
%none = torch.constant.none
%false = torch.constant.bool false
%int0 = torch.constant.int 0
%int9223372036854775807 = torch.constant.int 9223372036854775807
%int4 = torch.constant.int 4
%int512 = torch.constant.int 512
%int768 = torch.constant.int 768
%float9.999990e-13 = torch.constant.float 9.9999999999999998E-13
%float9.000000e-01 = torch.constant.float 9.000000e-01
%int2048 = torch.constant.int 2048
%int12 = torch.constant.int 12
%int64 = torch.constant.int 64
%int2 = torch.constant.int 2
%int1 = torch.constant.int 1
%int3 = torch.constant.int 3
%int-1 = torch.constant.int -1
%int-2 = torch.constant.int -2
%int48 = torch.constant.int 48
%int3072 = torch.constant.int 3072
%int30522 = torch.constant.int 30522
%int-100 = torch.constant.int -100
%3 = torch.aten.slice.Tensor %arg206, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%4 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%5 = torch.aten.expand %3, %4, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%6 = torch.aten.slice.Tensor %arg205, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%7 = torch.aten.embedding %arg5, %arg207, %int0, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%8 = torch.aten.embedding %arg4, %5, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%9 = torch.aten.add.Tensor %7, %8, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%10 = torch.aten.embedding %arg3, %6, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%11 = torch.aten.add_.Tensor %9, %10, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%12 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0, %result1, %result2 = torch.aten.native_layer_norm %11, %12, %arg2, %arg1, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%13 = torch.aten.empty_like %result0, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%14 = torch.operator "aten.bernoulli_.float"(%13, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%15 = torch.aten.div_.Scalar %14, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%16 = torch.aten.mul.Tensor %result0, %15 : !torch.tensor, !torch.tensor -> !torch.tensor
%17 = torch.aten.t %arg13 : !torch.tensor -> !torch.tensor
%18 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%19 = torch.aten.view %16, %18 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%20 = torch.aten.mm %19, %17 : !torch.tensor, !torch.tensor -> !torch.tensor
%21 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%22 = torch.operator "aten._unsafe_view"(%20, %21) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%23 = torch.aten.add_.Tensor %22, %arg12, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%24 = torch.aten.t %arg11 : !torch.tensor -> !torch.tensor
%25 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%26 = torch.aten.view %16, %25 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%27 = torch.aten.mm %26, %24 : !torch.tensor, !torch.tensor -> !torch.tensor
%28 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%29 = torch.operator "aten._unsafe_view"(%27, %28) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%30 = torch.aten.add_.Tensor %29, %arg10, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%31 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%32 = torch.aten.view %30, %31 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%33 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%34 = torch.aten.permute %32, %33 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%35 = torch.aten.t %arg15 : !torch.tensor -> !torch.tensor
%36 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%37 = torch.aten.view %16, %36 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%38 = torch.aten.mm %37, %35 : !torch.tensor, !torch.tensor -> !torch.tensor
%39 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%40 = torch.operator "aten._unsafe_view"(%38, %39) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%41 = torch.aten.add_.Tensor %40, %arg14, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%42 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%43 = torch.aten.view %41, %42 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%44 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%45 = torch.aten.permute %43, %44 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%46 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%47 = torch.aten.view %23, %46 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%48 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%49 = torch.aten.permute %47, %48 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%50 = torch.aten.transpose.int %34, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%51 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%52 = torch.aten.expand %49, %51, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%53 = torch.operator "aten.clone"(%52, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%54 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%55 = torch.operator "aten._unsafe_view"(%53, %54) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%56 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%57 = torch.aten.expand %50, %56, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%58 = torch.operator "aten.clone"(%57, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%59 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%60 = torch.operator "aten._unsafe_view"(%58, %59) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%61 = torch.aten.bmm %55, %60 : !torch.tensor, !torch.tensor -> !torch.tensor
%62 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%63 = torch.operator "aten._unsafe_view"(%61, %62) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%64 = torch.aten.div.Tensor %63, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%65 = torch.aten.add.Tensor %64, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%66 = torch.aten._softmax %65, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%67 = torch.aten.empty_like %66, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%68 = torch.operator "aten.bernoulli_.float"(%67, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%69 = torch.aten.div_.Scalar %68, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%70 = torch.aten.mul.Tensor %66, %69 : !torch.tensor, !torch.tensor -> !torch.tensor
%71 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%72 = torch.aten.expand %70, %71, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%73 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%74 = torch.aten.view %72, %73 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%75 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%76 = torch.aten.expand %45, %75, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%77 = torch.operator "aten.clone"(%76, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%78 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%79 = torch.operator "aten._unsafe_view"(%77, %78) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%80 = torch.aten.bmm %74, %79 : !torch.tensor, !torch.tensor -> !torch.tensor
%81 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%82 = torch.operator "aten._unsafe_view"(%80, %81) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%83 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%84 = torch.aten.permute %82, %83 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%85 = torch.operator "aten.clone"(%84, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%86 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%87 = torch.aten.view %85, %86 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%88 = torch.aten.t %arg9 : !torch.tensor -> !torch.tensor
%89 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%90 = torch.aten.view %87, %89 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%91 = torch.aten.mm %90, %88 : !torch.tensor, !torch.tensor -> !torch.tensor
%92 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%93 = torch.operator "aten._unsafe_view"(%91, %92) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%94 = torch.aten.add_.Tensor %93, %arg8, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%95 = torch.aten.empty_like %94, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%96 = torch.operator "aten.bernoulli_.float"(%95, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%97 = torch.aten.div_.Scalar %96, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%98 = torch.aten.mul.Tensor %94, %97 : !torch.tensor, !torch.tensor -> !torch.tensor
%99 = torch.aten.add.Tensor %98, %16, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%100 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_0, %result1_1, %result2_2 = torch.aten.native_layer_norm %99, %100, %arg7, %arg6, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%101 = torch.aten.t %arg17 : !torch.tensor -> !torch.tensor
%102 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%103 = torch.aten.view %result0_0, %102 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%104 = torch.aten.mm %103, %101 : !torch.tensor, !torch.tensor -> !torch.tensor
%105 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%106 = torch.operator "aten._unsafe_view"(%104, %105) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%107 = torch.aten.add_.Tensor %106, %arg16, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%108 = torch.aten.gelu %107 : !torch.tensor -> !torch.tensor
%109 = torch.aten.t %arg21 : !torch.tensor -> !torch.tensor
%110 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%111 = torch.aten.view %108, %110 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%112 = torch.aten.mm %111, %109 : !torch.tensor, !torch.tensor -> !torch.tensor
%113 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%114 = torch.operator "aten._unsafe_view"(%112, %113) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%115 = torch.aten.add_.Tensor %114, %arg20, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%116 = torch.aten.empty_like %115, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%117 = torch.operator "aten.bernoulli_.float"(%116, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%118 = torch.aten.div_.Scalar %117, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%119 = torch.aten.mul.Tensor %115, %118 : !torch.tensor, !torch.tensor -> !torch.tensor
%120 = torch.aten.add.Tensor %119, %result0_0, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%121 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_3, %result1_4, %result2_5 = torch.aten.native_layer_norm %120, %121, %arg19, %arg18, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%122 = torch.aten.t %arg29 : !torch.tensor -> !torch.tensor
%123 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%124 = torch.aten.view %result0_3, %123 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%125 = torch.aten.mm %124, %122 : !torch.tensor, !torch.tensor -> !torch.tensor
%126 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%127 = torch.operator "aten._unsafe_view"(%125, %126) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%128 = torch.aten.add_.Tensor %127, %arg28, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%129 = torch.aten.t %arg27 : !torch.tensor -> !torch.tensor
%130 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%131 = torch.aten.view %result0_3, %130 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%132 = torch.aten.mm %131, %129 : !torch.tensor, !torch.tensor -> !torch.tensor
%133 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%134 = torch.operator "aten._unsafe_view"(%132, %133) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%135 = torch.aten.add_.Tensor %134, %arg26, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%136 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%137 = torch.aten.view %135, %136 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%138 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%139 = torch.aten.permute %137, %138 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%140 = torch.aten.t %arg31 : !torch.tensor -> !torch.tensor
%141 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%142 = torch.aten.view %result0_3, %141 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%143 = torch.aten.mm %142, %140 : !torch.tensor, !torch.tensor -> !torch.tensor
%144 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%145 = torch.operator "aten._unsafe_view"(%143, %144) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%146 = torch.aten.add_.Tensor %145, %arg30, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%147 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%148 = torch.aten.view %146, %147 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%149 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%150 = torch.aten.permute %148, %149 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%151 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%152 = torch.aten.view %128, %151 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%153 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%154 = torch.aten.permute %152, %153 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%155 = torch.aten.transpose.int %139, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%156 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%157 = torch.aten.expand %154, %156, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%158 = torch.operator "aten.clone"(%157, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%159 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%160 = torch.operator "aten._unsafe_view"(%158, %159) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%161 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%162 = torch.aten.expand %155, %161, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%163 = torch.operator "aten.clone"(%162, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%164 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%165 = torch.operator "aten._unsafe_view"(%163, %164) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%166 = torch.aten.bmm %160, %165 : !torch.tensor, !torch.tensor -> !torch.tensor
%167 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%168 = torch.operator "aten._unsafe_view"(%166, %167) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%169 = torch.aten.div.Tensor %168, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%170 = torch.aten.add.Tensor %169, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%171 = torch.aten._softmax %170, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%172 = torch.aten.empty_like %171, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%173 = torch.operator "aten.bernoulli_.float"(%172, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%174 = torch.aten.div_.Scalar %173, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%175 = torch.aten.mul.Tensor %171, %174 : !torch.tensor, !torch.tensor -> !torch.tensor
%176 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%177 = torch.aten.expand %175, %176, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%178 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%179 = torch.aten.view %177, %178 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%180 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%181 = torch.aten.expand %150, %180, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%182 = torch.operator "aten.clone"(%181, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%183 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%184 = torch.operator "aten._unsafe_view"(%182, %183) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%185 = torch.aten.bmm %179, %184 : !torch.tensor, !torch.tensor -> !torch.tensor
%186 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%187 = torch.operator "aten._unsafe_view"(%185, %186) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%188 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%189 = torch.aten.permute %187, %188 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%190 = torch.operator "aten.clone"(%189, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%191 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%192 = torch.aten.view %190, %191 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%193 = torch.aten.t %arg25 : !torch.tensor -> !torch.tensor
%194 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%195 = torch.aten.view %192, %194 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%196 = torch.aten.mm %195, %193 : !torch.tensor, !torch.tensor -> !torch.tensor
%197 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%198 = torch.operator "aten._unsafe_view"(%196, %197) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%199 = torch.aten.add_.Tensor %198, %arg24, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%200 = torch.aten.empty_like %199, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%201 = torch.operator "aten.bernoulli_.float"(%200, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%202 = torch.aten.div_.Scalar %201, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%203 = torch.aten.mul.Tensor %199, %202 : !torch.tensor, !torch.tensor -> !torch.tensor
%204 = torch.aten.add.Tensor %203, %result0_3, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%205 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_6, %result1_7, %result2_8 = torch.aten.native_layer_norm %204, %205, %arg23, %arg22, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%206 = torch.aten.t %arg33 : !torch.tensor -> !torch.tensor
%207 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%208 = torch.aten.view %result0_6, %207 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%209 = torch.aten.mm %208, %206 : !torch.tensor, !torch.tensor -> !torch.tensor
%210 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%211 = torch.operator "aten._unsafe_view"(%209, %210) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%212 = torch.aten.add_.Tensor %211, %arg32, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%213 = torch.aten.gelu %212 : !torch.tensor -> !torch.tensor
%214 = torch.aten.t %arg37 : !torch.tensor -> !torch.tensor
%215 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%216 = torch.aten.view %213, %215 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%217 = torch.aten.mm %216, %214 : !torch.tensor, !torch.tensor -> !torch.tensor
%218 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%219 = torch.operator "aten._unsafe_view"(%217, %218) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%220 = torch.aten.add_.Tensor %219, %arg36, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%221 = torch.aten.empty_like %220, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%222 = torch.operator "aten.bernoulli_.float"(%221, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%223 = torch.aten.div_.Scalar %222, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%224 = torch.aten.mul.Tensor %220, %223 : !torch.tensor, !torch.tensor -> !torch.tensor
%225 = torch.aten.add.Tensor %224, %result0_6, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%226 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_9, %result1_10, %result2_11 = torch.aten.native_layer_norm %225, %226, %arg35, %arg34, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%227 = torch.aten.t %arg77 : !torch.tensor -> !torch.tensor
%228 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%229 = torch.aten.view %result0_9, %228 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%230 = torch.aten.mm %229, %227 : !torch.tensor, !torch.tensor -> !torch.tensor
%231 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%232 = torch.operator "aten._unsafe_view"(%230, %231) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%233 = torch.aten.add_.Tensor %232, %arg76, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%234 = torch.aten.t %arg75 : !torch.tensor -> !torch.tensor
%235 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%236 = torch.aten.view %result0_9, %235 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%237 = torch.aten.mm %236, %234 : !torch.tensor, !torch.tensor -> !torch.tensor
%238 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%239 = torch.operator "aten._unsafe_view"(%237, %238) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%240 = torch.aten.add_.Tensor %239, %arg74, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%241 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%242 = torch.aten.view %240, %241 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%243 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%244 = torch.aten.permute %242, %243 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%245 = torch.aten.t %arg79 : !torch.tensor -> !torch.tensor
%246 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%247 = torch.aten.view %result0_9, %246 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%248 = torch.aten.mm %247, %245 : !torch.tensor, !torch.tensor -> !torch.tensor
%249 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%250 = torch.operator "aten._unsafe_view"(%248, %249) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%251 = torch.aten.add_.Tensor %250, %arg78, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%252 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%253 = torch.aten.view %251, %252 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%254 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%255 = torch.aten.permute %253, %254 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%256 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%257 = torch.aten.view %233, %256 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%258 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%259 = torch.aten.permute %257, %258 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%260 = torch.aten.transpose.int %244, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%261 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%262 = torch.aten.expand %259, %261, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%263 = torch.operator "aten.clone"(%262, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%264 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%265 = torch.operator "aten._unsafe_view"(%263, %264) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%266 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%267 = torch.aten.expand %260, %266, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%268 = torch.operator "aten.clone"(%267, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%269 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%270 = torch.operator "aten._unsafe_view"(%268, %269) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%271 = torch.aten.bmm %265, %270 : !torch.tensor, !torch.tensor -> !torch.tensor
%272 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%273 = torch.operator "aten._unsafe_view"(%271, %272) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%274 = torch.aten.div.Tensor %273, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%275 = torch.aten.add.Tensor %274, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%276 = torch.aten._softmax %275, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%277 = torch.aten.empty_like %276, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%278 = torch.operator "aten.bernoulli_.float"(%277, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%279 = torch.aten.div_.Scalar %278, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%280 = torch.aten.mul.Tensor %276, %279 : !torch.tensor, !torch.tensor -> !torch.tensor
%281 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%282 = torch.aten.expand %280, %281, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%283 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%284 = torch.aten.view %282, %283 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%285 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%286 = torch.aten.expand %255, %285, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%287 = torch.operator "aten.clone"(%286, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%288 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%289 = torch.operator "aten._unsafe_view"(%287, %288) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%290 = torch.aten.bmm %284, %289 : !torch.tensor, !torch.tensor -> !torch.tensor
%291 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%292 = torch.operator "aten._unsafe_view"(%290, %291) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%293 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%294 = torch.aten.permute %292, %293 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%295 = torch.operator "aten.clone"(%294, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%296 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%297 = torch.aten.view %295, %296 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%298 = torch.aten.t %arg73 : !torch.tensor -> !torch.tensor
%299 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%300 = torch.aten.view %297, %299 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%301 = torch.aten.mm %300, %298 : !torch.tensor, !torch.tensor -> !torch.tensor
%302 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%303 = torch.operator "aten._unsafe_view"(%301, %302) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%304 = torch.aten.add_.Tensor %303, %arg72, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%305 = torch.aten.empty_like %304, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%306 = torch.operator "aten.bernoulli_.float"(%305, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%307 = torch.aten.div_.Scalar %306, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%308 = torch.aten.mul.Tensor %304, %307 : !torch.tensor, !torch.tensor -> !torch.tensor
%309 = torch.aten.add.Tensor %308, %result0_9, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%310 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_12, %result1_13, %result2_14 = torch.aten.native_layer_norm %309, %310, %arg71, %arg70, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%311 = torch.aten.t %arg81 : !torch.tensor -> !torch.tensor
%312 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%313 = torch.aten.view %result0_12, %312 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%314 = torch.aten.mm %313, %311 : !torch.tensor, !torch.tensor -> !torch.tensor
%315 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%316 = torch.operator "aten._unsafe_view"(%314, %315) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%317 = torch.aten.add_.Tensor %316, %arg80, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%318 = torch.aten.gelu %317 : !torch.tensor -> !torch.tensor
%319 = torch.aten.t %arg85 : !torch.tensor -> !torch.tensor
%320 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%321 = torch.aten.view %318, %320 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%322 = torch.aten.mm %321, %319 : !torch.tensor, !torch.tensor -> !torch.tensor
%323 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%324 = torch.operator "aten._unsafe_view"(%322, %323) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%325 = torch.aten.add_.Tensor %324, %arg84, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%326 = torch.aten.empty_like %325, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%327 = torch.operator "aten.bernoulli_.float"(%326, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%328 = torch.aten.div_.Scalar %327, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%329 = torch.aten.mul.Tensor %325, %328 : !torch.tensor, !torch.tensor -> !torch.tensor
%330 = torch.aten.add.Tensor %329, %result0_12, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%331 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_15, %result1_16, %result2_17 = torch.aten.native_layer_norm %330, %331, %arg83, %arg82, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%332 = torch.aten.t %arg93 : !torch.tensor -> !torch.tensor
%333 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%334 = torch.aten.view %result0_15, %333 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%335 = torch.aten.mm %334, %332 : !torch.tensor, !torch.tensor -> !torch.tensor
%336 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%337 = torch.operator "aten._unsafe_view"(%335, %336) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%338 = torch.aten.add_.Tensor %337, %arg92, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%339 = torch.aten.t %arg91 : !torch.tensor -> !torch.tensor
%340 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%341 = torch.aten.view %result0_15, %340 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%342 = torch.aten.mm %341, %339 : !torch.tensor, !torch.tensor -> !torch.tensor
%343 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%344 = torch.operator "aten._unsafe_view"(%342, %343) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%345 = torch.aten.add_.Tensor %344, %arg90, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%346 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%347 = torch.aten.view %345, %346 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%348 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%349 = torch.aten.permute %347, %348 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%350 = torch.aten.t %arg95 : !torch.tensor -> !torch.tensor
%351 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%352 = torch.aten.view %result0_15, %351 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%353 = torch.aten.mm %352, %350 : !torch.tensor, !torch.tensor -> !torch.tensor
%354 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%355 = torch.operator "aten._unsafe_view"(%353, %354) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%356 = torch.aten.add_.Tensor %355, %arg94, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%357 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%358 = torch.aten.view %356, %357 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%359 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%360 = torch.aten.permute %358, %359 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%361 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%362 = torch.aten.view %338, %361 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%363 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%364 = torch.aten.permute %362, %363 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%365 = torch.aten.transpose.int %349, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%366 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%367 = torch.aten.expand %364, %366, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%368 = torch.operator "aten.clone"(%367, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%369 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%370 = torch.operator "aten._unsafe_view"(%368, %369) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%371 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%372 = torch.aten.expand %365, %371, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%373 = torch.operator "aten.clone"(%372, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%374 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%375 = torch.operator "aten._unsafe_view"(%373, %374) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%376 = torch.aten.bmm %370, %375 : !torch.tensor, !torch.tensor -> !torch.tensor
%377 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%378 = torch.operator "aten._unsafe_view"(%376, %377) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%379 = torch.aten.div.Tensor %378, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%380 = torch.aten.add.Tensor %379, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%381 = torch.aten._softmax %380, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%382 = torch.aten.empty_like %381, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%383 = torch.operator "aten.bernoulli_.float"(%382, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%384 = torch.aten.div_.Scalar %383, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%385 = torch.aten.mul.Tensor %381, %384 : !torch.tensor, !torch.tensor -> !torch.tensor
%386 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%387 = torch.aten.expand %385, %386, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%388 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%389 = torch.aten.view %387, %388 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%390 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%391 = torch.aten.expand %360, %390, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%392 = torch.operator "aten.clone"(%391, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%393 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%394 = torch.operator "aten._unsafe_view"(%392, %393) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%395 = torch.aten.bmm %389, %394 : !torch.tensor, !torch.tensor -> !torch.tensor
%396 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%397 = torch.operator "aten._unsafe_view"(%395, %396) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%398 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%399 = torch.aten.permute %397, %398 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%400 = torch.operator "aten.clone"(%399, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%401 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%402 = torch.aten.view %400, %401 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%403 = torch.aten.t %arg89 : !torch.tensor -> !torch.tensor
%404 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%405 = torch.aten.view %402, %404 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%406 = torch.aten.mm %405, %403 : !torch.tensor, !torch.tensor -> !torch.tensor
%407 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%408 = torch.operator "aten._unsafe_view"(%406, %407) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%409 = torch.aten.add_.Tensor %408, %arg88, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%410 = torch.aten.empty_like %409, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%411 = torch.operator "aten.bernoulli_.float"(%410, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%412 = torch.aten.div_.Scalar %411, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%413 = torch.aten.mul.Tensor %409, %412 : !torch.tensor, !torch.tensor -> !torch.tensor
%414 = torch.aten.add.Tensor %413, %result0_15, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%415 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_18, %result1_19, %result2_20 = torch.aten.native_layer_norm %414, %415, %arg87, %arg86, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%416 = torch.aten.t %arg97 : !torch.tensor -> !torch.tensor
%417 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%418 = torch.aten.view %result0_18, %417 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%419 = torch.aten.mm %418, %416 : !torch.tensor, !torch.tensor -> !torch.tensor
%420 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%421 = torch.operator "aten._unsafe_view"(%419, %420) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%422 = torch.aten.add_.Tensor %421, %arg96, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%423 = torch.aten.gelu %422 : !torch.tensor -> !torch.tensor
%424 = torch.aten.t %arg101 : !torch.tensor -> !torch.tensor
%425 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%426 = torch.aten.view %423, %425 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%427 = torch.aten.mm %426, %424 : !torch.tensor, !torch.tensor -> !torch.tensor
%428 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%429 = torch.operator "aten._unsafe_view"(%427, %428) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%430 = torch.aten.add_.Tensor %429, %arg100, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%431 = torch.aten.empty_like %430, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%432 = torch.operator "aten.bernoulli_.float"(%431, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%433 = torch.aten.div_.Scalar %432, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%434 = torch.aten.mul.Tensor %430, %433 : !torch.tensor, !torch.tensor -> !torch.tensor
%435 = torch.aten.add.Tensor %434, %result0_18, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%436 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_21, %result1_22, %result2_23 = torch.aten.native_layer_norm %435, %436, %arg99, %arg98, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%437 = torch.aten.t %arg109 : !torch.tensor -> !torch.tensor
%438 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%439 = torch.aten.view %result0_21, %438 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%440 = torch.aten.mm %439, %437 : !torch.tensor, !torch.tensor -> !torch.tensor
%441 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%442 = torch.operator "aten._unsafe_view"(%440, %441) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%443 = torch.aten.add_.Tensor %442, %arg108, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%444 = torch.aten.t %arg107 : !torch.tensor -> !torch.tensor
%445 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%446 = torch.aten.view %result0_21, %445 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%447 = torch.aten.mm %446, %444 : !torch.tensor, !torch.tensor -> !torch.tensor
%448 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%449 = torch.operator "aten._unsafe_view"(%447, %448) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%450 = torch.aten.add_.Tensor %449, %arg106, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%451 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%452 = torch.aten.view %450, %451 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%453 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%454 = torch.aten.permute %452, %453 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%455 = torch.aten.t %arg111 : !torch.tensor -> !torch.tensor
%456 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%457 = torch.aten.view %result0_21, %456 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%458 = torch.aten.mm %457, %455 : !torch.tensor, !torch.tensor -> !torch.tensor
%459 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%460 = torch.operator "aten._unsafe_view"(%458, %459) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%461 = torch.aten.add_.Tensor %460, %arg110, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%462 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%463 = torch.aten.view %461, %462 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%464 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%465 = torch.aten.permute %463, %464 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%466 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%467 = torch.aten.view %443, %466 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%468 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%469 = torch.aten.permute %467, %468 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%470 = torch.aten.transpose.int %454, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%471 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%472 = torch.aten.expand %469, %471, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%473 = torch.operator "aten.clone"(%472, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%474 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%475 = torch.operator "aten._unsafe_view"(%473, %474) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%476 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%477 = torch.aten.expand %470, %476, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%478 = torch.operator "aten.clone"(%477, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%479 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%480 = torch.operator "aten._unsafe_view"(%478, %479) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%481 = torch.aten.bmm %475, %480 : !torch.tensor, !torch.tensor -> !torch.tensor
%482 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%483 = torch.operator "aten._unsafe_view"(%481, %482) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%484 = torch.aten.div.Tensor %483, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%485 = torch.aten.add.Tensor %484, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%486 = torch.aten._softmax %485, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%487 = torch.aten.empty_like %486, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%488 = torch.operator "aten.bernoulli_.float"(%487, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%489 = torch.aten.div_.Scalar %488, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%490 = torch.aten.mul.Tensor %486, %489 : !torch.tensor, !torch.tensor -> !torch.tensor
%491 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%492 = torch.aten.expand %490, %491, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%493 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%494 = torch.aten.view %492, %493 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%495 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%496 = torch.aten.expand %465, %495, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%497 = torch.operator "aten.clone"(%496, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%498 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%499 = torch.operator "aten._unsafe_view"(%497, %498) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%500 = torch.aten.bmm %494, %499 : !torch.tensor, !torch.tensor -> !torch.tensor
%501 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%502 = torch.operator "aten._unsafe_view"(%500, %501) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%503 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%504 = torch.aten.permute %502, %503 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%505 = torch.operator "aten.clone"(%504, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%506 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%507 = torch.aten.view %505, %506 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%508 = torch.aten.t %arg105 : !torch.tensor -> !torch.tensor
%509 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%510 = torch.aten.view %507, %509 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%511 = torch.aten.mm %510, %508 : !torch.tensor, !torch.tensor -> !torch.tensor
%512 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%513 = torch.operator "aten._unsafe_view"(%511, %512) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%514 = torch.aten.add_.Tensor %513, %arg104, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%515 = torch.aten.empty_like %514, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%516 = torch.operator "aten.bernoulli_.float"(%515, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%517 = torch.aten.div_.Scalar %516, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%518 = torch.aten.mul.Tensor %514, %517 : !torch.tensor, !torch.tensor -> !torch.tensor
%519 = torch.aten.add.Tensor %518, %result0_21, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%520 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_24, %result1_25, %result2_26 = torch.aten.native_layer_norm %519, %520, %arg103, %arg102, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%521 = torch.aten.t %arg113 : !torch.tensor -> !torch.tensor
%522 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%523 = torch.aten.view %result0_24, %522 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%524 = torch.aten.mm %523, %521 : !torch.tensor, !torch.tensor -> !torch.tensor
%525 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%526 = torch.operator "aten._unsafe_view"(%524, %525) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%527 = torch.aten.add_.Tensor %526, %arg112, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%528 = torch.aten.gelu %527 : !torch.tensor -> !torch.tensor
%529 = torch.aten.t %arg117 : !torch.tensor -> !torch.tensor
%530 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%531 = torch.aten.view %528, %530 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%532 = torch.aten.mm %531, %529 : !torch.tensor, !torch.tensor -> !torch.tensor
%533 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%534 = torch.operator "aten._unsafe_view"(%532, %533) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%535 = torch.aten.add_.Tensor %534, %arg116, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%536 = torch.aten.empty_like %535, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%537 = torch.operator "aten.bernoulli_.float"(%536, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%538 = torch.aten.div_.Scalar %537, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%539 = torch.aten.mul.Tensor %535, %538 : !torch.tensor, !torch.tensor -> !torch.tensor
%540 = torch.aten.add.Tensor %539, %result0_24, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%541 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_27, %result1_28, %result2_29 = torch.aten.native_layer_norm %540, %541, %arg115, %arg114, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%542 = torch.aten.t %arg125 : !torch.tensor -> !torch.tensor
%543 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%544 = torch.aten.view %result0_27, %543 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%545 = torch.aten.mm %544, %542 : !torch.tensor, !torch.tensor -> !torch.tensor
%546 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%547 = torch.operator "aten._unsafe_view"(%545, %546) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%548 = torch.aten.add_.Tensor %547, %arg124, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%549 = torch.aten.t %arg123 : !torch.tensor -> !torch.tensor
%550 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%551 = torch.aten.view %result0_27, %550 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%552 = torch.aten.mm %551, %549 : !torch.tensor, !torch.tensor -> !torch.tensor
%553 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%554 = torch.operator "aten._unsafe_view"(%552, %553) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%555 = torch.aten.add_.Tensor %554, %arg122, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%556 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%557 = torch.aten.view %555, %556 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%558 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%559 = torch.aten.permute %557, %558 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%560 = torch.aten.t %arg127 : !torch.tensor -> !torch.tensor
%561 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%562 = torch.aten.view %result0_27, %561 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%563 = torch.aten.mm %562, %560 : !torch.tensor, !torch.tensor -> !torch.tensor
%564 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%565 = torch.operator "aten._unsafe_view"(%563, %564) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%566 = torch.aten.add_.Tensor %565, %arg126, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%567 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%568 = torch.aten.view %566, %567 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%569 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%570 = torch.aten.permute %568, %569 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%571 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%572 = torch.aten.view %548, %571 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%573 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%574 = torch.aten.permute %572, %573 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%575 = torch.aten.transpose.int %559, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%576 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%577 = torch.aten.expand %574, %576, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%578 = torch.operator "aten.clone"(%577, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%579 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%580 = torch.operator "aten._unsafe_view"(%578, %579) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%581 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%582 = torch.aten.expand %575, %581, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%583 = torch.operator "aten.clone"(%582, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%584 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%585 = torch.operator "aten._unsafe_view"(%583, %584) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%586 = torch.aten.bmm %580, %585 : !torch.tensor, !torch.tensor -> !torch.tensor
%587 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%588 = torch.operator "aten._unsafe_view"(%586, %587) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%589 = torch.aten.div.Tensor %588, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%590 = torch.aten.add.Tensor %589, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%591 = torch.aten._softmax %590, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%592 = torch.aten.empty_like %591, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%593 = torch.operator "aten.bernoulli_.float"(%592, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%594 = torch.aten.div_.Scalar %593, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%595 = torch.aten.mul.Tensor %591, %594 : !torch.tensor, !torch.tensor -> !torch.tensor
%596 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%597 = torch.aten.expand %595, %596, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%598 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%599 = torch.aten.view %597, %598 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%600 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%601 = torch.aten.expand %570, %600, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%602 = torch.operator "aten.clone"(%601, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%603 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%604 = torch.operator "aten._unsafe_view"(%602, %603) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%605 = torch.aten.bmm %599, %604 : !torch.tensor, !torch.tensor -> !torch.tensor
%606 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%607 = torch.operator "aten._unsafe_view"(%605, %606) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%608 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%609 = torch.aten.permute %607, %608 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%610 = torch.operator "aten.clone"(%609, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%611 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%612 = torch.aten.view %610, %611 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%613 = torch.aten.t %arg121 : !torch.tensor -> !torch.tensor
%614 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%615 = torch.aten.view %612, %614 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%616 = torch.aten.mm %615, %613 : !torch.tensor, !torch.tensor -> !torch.tensor
%617 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%618 = torch.operator "aten._unsafe_view"(%616, %617) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%619 = torch.aten.add_.Tensor %618, %arg120, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%620 = torch.aten.empty_like %619, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%621 = torch.operator "aten.bernoulli_.float"(%620, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%622 = torch.aten.div_.Scalar %621, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%623 = torch.aten.mul.Tensor %619, %622 : !torch.tensor, !torch.tensor -> !torch.tensor
%624 = torch.aten.add.Tensor %623, %result0_27, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%625 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_30, %result1_31, %result2_32 = torch.aten.native_layer_norm %624, %625, %arg119, %arg118, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%626 = torch.aten.t %arg129 : !torch.tensor -> !torch.tensor
%627 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%628 = torch.aten.view %result0_30, %627 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%629 = torch.aten.mm %628, %626 : !torch.tensor, !torch.tensor -> !torch.tensor
%630 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%631 = torch.operator "aten._unsafe_view"(%629, %630) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%632 = torch.aten.add_.Tensor %631, %arg128, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%633 = torch.aten.gelu %632 : !torch.tensor -> !torch.tensor
%634 = torch.aten.t %arg133 : !torch.tensor -> !torch.tensor
%635 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%636 = torch.aten.view %633, %635 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%637 = torch.aten.mm %636, %634 : !torch.tensor, !torch.tensor -> !torch.tensor
%638 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%639 = torch.operator "aten._unsafe_view"(%637, %638) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%640 = torch.aten.add_.Tensor %639, %arg132, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%641 = torch.aten.empty_like %640, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%642 = torch.operator "aten.bernoulli_.float"(%641, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%643 = torch.aten.div_.Scalar %642, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%644 = torch.aten.mul.Tensor %640, %643 : !torch.tensor, !torch.tensor -> !torch.tensor
%645 = torch.aten.add.Tensor %644, %result0_30, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%646 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_33, %result1_34, %result2_35 = torch.aten.native_layer_norm %645, %646, %arg131, %arg130, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%647 = torch.aten.t %arg141 : !torch.tensor -> !torch.tensor
%648 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%649 = torch.aten.view %result0_33, %648 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%650 = torch.aten.mm %649, %647 : !torch.tensor, !torch.tensor -> !torch.tensor
%651 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%652 = torch.operator "aten._unsafe_view"(%650, %651) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%653 = torch.aten.add_.Tensor %652, %arg140, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%654 = torch.aten.t %arg139 : !torch.tensor -> !torch.tensor
%655 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%656 = torch.aten.view %result0_33, %655 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%657 = torch.aten.mm %656, %654 : !torch.tensor, !torch.tensor -> !torch.tensor
%658 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%659 = torch.operator "aten._unsafe_view"(%657, %658) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%660 = torch.aten.add_.Tensor %659, %arg138, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%661 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%662 = torch.aten.view %660, %661 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%663 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%664 = torch.aten.permute %662, %663 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%665 = torch.aten.t %arg143 : !torch.tensor -> !torch.tensor
%666 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%667 = torch.aten.view %result0_33, %666 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%668 = torch.aten.mm %667, %665 : !torch.tensor, !torch.tensor -> !torch.tensor
%669 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%670 = torch.operator "aten._unsafe_view"(%668, %669) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%671 = torch.aten.add_.Tensor %670, %arg142, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%672 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%673 = torch.aten.view %671, %672 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%674 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%675 = torch.aten.permute %673, %674 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%676 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%677 = torch.aten.view %653, %676 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%678 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%679 = torch.aten.permute %677, %678 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%680 = torch.aten.transpose.int %664, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%681 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%682 = torch.aten.expand %679, %681, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%683 = torch.operator "aten.clone"(%682, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%684 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%685 = torch.operator "aten._unsafe_view"(%683, %684) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%686 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%687 = torch.aten.expand %680, %686, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%688 = torch.operator "aten.clone"(%687, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%689 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%690 = torch.operator "aten._unsafe_view"(%688, %689) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%691 = torch.aten.bmm %685, %690 : !torch.tensor, !torch.tensor -> !torch.tensor
%692 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%693 = torch.operator "aten._unsafe_view"(%691, %692) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%694 = torch.aten.div.Tensor %693, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%695 = torch.aten.add.Tensor %694, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%696 = torch.aten._softmax %695, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%697 = torch.aten.empty_like %696, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%698 = torch.operator "aten.bernoulli_.float"(%697, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%699 = torch.aten.div_.Scalar %698, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%700 = torch.aten.mul.Tensor %696, %699 : !torch.tensor, !torch.tensor -> !torch.tensor
%701 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%702 = torch.aten.expand %700, %701, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%703 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%704 = torch.aten.view %702, %703 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%705 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%706 = torch.aten.expand %675, %705, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%707 = torch.operator "aten.clone"(%706, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%708 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%709 = torch.operator "aten._unsafe_view"(%707, %708) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%710 = torch.aten.bmm %704, %709 : !torch.tensor, !torch.tensor -> !torch.tensor
%711 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%712 = torch.operator "aten._unsafe_view"(%710, %711) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%713 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%714 = torch.aten.permute %712, %713 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%715 = torch.operator "aten.clone"(%714, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%716 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%717 = torch.aten.view %715, %716 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%718 = torch.aten.t %arg137 : !torch.tensor -> !torch.tensor
%719 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%720 = torch.aten.view %717, %719 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%721 = torch.aten.mm %720, %718 : !torch.tensor, !torch.tensor -> !torch.tensor
%722 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%723 = torch.operator "aten._unsafe_view"(%721, %722) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%724 = torch.aten.add_.Tensor %723, %arg136, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%725 = torch.aten.empty_like %724, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%726 = torch.operator "aten.bernoulli_.float"(%725, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%727 = torch.aten.div_.Scalar %726, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%728 = torch.aten.mul.Tensor %724, %727 : !torch.tensor, !torch.tensor -> !torch.tensor
%729 = torch.aten.add.Tensor %728, %result0_33, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%730 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_36, %result1_37, %result2_38 = torch.aten.native_layer_norm %729, %730, %arg135, %arg134, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%731 = torch.aten.t %arg145 : !torch.tensor -> !torch.tensor
%732 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%733 = torch.aten.view %result0_36, %732 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%734 = torch.aten.mm %733, %731 : !torch.tensor, !torch.tensor -> !torch.tensor
%735 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%736 = torch.operator "aten._unsafe_view"(%734, %735) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%737 = torch.aten.add_.Tensor %736, %arg144, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%738 = torch.aten.gelu %737 : !torch.tensor -> !torch.tensor
%739 = torch.aten.t %arg149 : !torch.tensor -> !torch.tensor
%740 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%741 = torch.aten.view %738, %740 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%742 = torch.aten.mm %741, %739 : !torch.tensor, !torch.tensor -> !torch.tensor
%743 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%744 = torch.operator "aten._unsafe_view"(%742, %743) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%745 = torch.aten.add_.Tensor %744, %arg148, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%746 = torch.aten.empty_like %745, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%747 = torch.operator "aten.bernoulli_.float"(%746, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%748 = torch.aten.div_.Scalar %747, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%749 = torch.aten.mul.Tensor %745, %748 : !torch.tensor, !torch.tensor -> !torch.tensor
%750 = torch.aten.add.Tensor %749, %result0_36, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%751 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_39, %result1_40, %result2_41 = torch.aten.native_layer_norm %750, %751, %arg147, %arg146, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%752 = torch.aten.t %arg157 : !torch.tensor -> !torch.tensor
%753 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%754 = torch.aten.view %result0_39, %753 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%755 = torch.aten.mm %754, %752 : !torch.tensor, !torch.tensor -> !torch.tensor
%756 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%757 = torch.operator "aten._unsafe_view"(%755, %756) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%758 = torch.aten.add_.Tensor %757, %arg156, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%759 = torch.aten.t %arg155 : !torch.tensor -> !torch.tensor
%760 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%761 = torch.aten.view %result0_39, %760 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%762 = torch.aten.mm %761, %759 : !torch.tensor, !torch.tensor -> !torch.tensor
%763 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%764 = torch.operator "aten._unsafe_view"(%762, %763) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%765 = torch.aten.add_.Tensor %764, %arg154, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%766 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%767 = torch.aten.view %765, %766 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%768 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%769 = torch.aten.permute %767, %768 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%770 = torch.aten.t %arg159 : !torch.tensor -> !torch.tensor
%771 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%772 = torch.aten.view %result0_39, %771 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%773 = torch.aten.mm %772, %770 : !torch.tensor, !torch.tensor -> !torch.tensor
%774 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%775 = torch.operator "aten._unsafe_view"(%773, %774) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%776 = torch.aten.add_.Tensor %775, %arg158, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%777 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%778 = torch.aten.view %776, %777 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%779 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%780 = torch.aten.permute %778, %779 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%781 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%782 = torch.aten.view %758, %781 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%783 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%784 = torch.aten.permute %782, %783 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%785 = torch.aten.transpose.int %769, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%786 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%787 = torch.aten.expand %784, %786, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%788 = torch.operator "aten.clone"(%787, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%789 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%790 = torch.operator "aten._unsafe_view"(%788, %789) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%791 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%792 = torch.aten.expand %785, %791, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%793 = torch.operator "aten.clone"(%792, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%794 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%795 = torch.operator "aten._unsafe_view"(%793, %794) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%796 = torch.aten.bmm %790, %795 : !torch.tensor, !torch.tensor -> !torch.tensor
%797 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%798 = torch.operator "aten._unsafe_view"(%796, %797) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%799 = torch.aten.div.Tensor %798, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%800 = torch.aten.add.Tensor %799, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%801 = torch.aten._softmax %800, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%802 = torch.aten.empty_like %801, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%803 = torch.operator "aten.bernoulli_.float"(%802, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%804 = torch.aten.div_.Scalar %803, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%805 = torch.aten.mul.Tensor %801, %804 : !torch.tensor, !torch.tensor -> !torch.tensor
%806 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%807 = torch.aten.expand %805, %806, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%808 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%809 = torch.aten.view %807, %808 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%810 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%811 = torch.aten.expand %780, %810, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%812 = torch.operator "aten.clone"(%811, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%813 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%814 = torch.operator "aten._unsafe_view"(%812, %813) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%815 = torch.aten.bmm %809, %814 : !torch.tensor, !torch.tensor -> !torch.tensor
%816 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%817 = torch.operator "aten._unsafe_view"(%815, %816) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%818 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%819 = torch.aten.permute %817, %818 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%820 = torch.operator "aten.clone"(%819, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%821 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%822 = torch.aten.view %820, %821 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%823 = torch.aten.t %arg153 : !torch.tensor -> !torch.tensor
%824 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%825 = torch.aten.view %822, %824 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%826 = torch.aten.mm %825, %823 : !torch.tensor, !torch.tensor -> !torch.tensor
%827 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%828 = torch.operator "aten._unsafe_view"(%826, %827) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%829 = torch.aten.add_.Tensor %828, %arg152, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%830 = torch.aten.empty_like %829, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%831 = torch.operator "aten.bernoulli_.float"(%830, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%832 = torch.aten.div_.Scalar %831, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%833 = torch.aten.mul.Tensor %829, %832 : !torch.tensor, !torch.tensor -> !torch.tensor
%834 = torch.aten.add.Tensor %833, %result0_39, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%835 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_42, %result1_43, %result2_44 = torch.aten.native_layer_norm %834, %835, %arg151, %arg150, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%836 = torch.aten.t %arg161 : !torch.tensor -> !torch.tensor
%837 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%838 = torch.aten.view %result0_42, %837 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%839 = torch.aten.mm %838, %836 : !torch.tensor, !torch.tensor -> !torch.tensor
%840 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%841 = torch.operator "aten._unsafe_view"(%839, %840) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%842 = torch.aten.add_.Tensor %841, %arg160, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%843 = torch.aten.gelu %842 : !torch.tensor -> !torch.tensor
%844 = torch.aten.t %arg165 : !torch.tensor -> !torch.tensor
%845 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%846 = torch.aten.view %843, %845 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%847 = torch.aten.mm %846, %844 : !torch.tensor, !torch.tensor -> !torch.tensor
%848 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%849 = torch.operator "aten._unsafe_view"(%847, %848) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%850 = torch.aten.add_.Tensor %849, %arg164, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%851 = torch.aten.empty_like %850, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%852 = torch.operator "aten.bernoulli_.float"(%851, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%853 = torch.aten.div_.Scalar %852, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%854 = torch.aten.mul.Tensor %850, %853 : !torch.tensor, !torch.tensor -> !torch.tensor
%855 = torch.aten.add.Tensor %854, %result0_42, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%856 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_45, %result1_46, %result2_47 = torch.aten.native_layer_norm %855, %856, %arg163, %arg162, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%857 = torch.aten.t %arg173 : !torch.tensor -> !torch.tensor
%858 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%859 = torch.aten.view %result0_45, %858 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%860 = torch.aten.mm %859, %857 : !torch.tensor, !torch.tensor -> !torch.tensor
%861 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%862 = torch.operator "aten._unsafe_view"(%860, %861) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%863 = torch.aten.add_.Tensor %862, %arg172, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%864 = torch.aten.t %arg171 : !torch.tensor -> !torch.tensor
%865 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%866 = torch.aten.view %result0_45, %865 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%867 = torch.aten.mm %866, %864 : !torch.tensor, !torch.tensor -> !torch.tensor
%868 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%869 = torch.operator "aten._unsafe_view"(%867, %868) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%870 = torch.aten.add_.Tensor %869, %arg170, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%871 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%872 = torch.aten.view %870, %871 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%873 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%874 = torch.aten.permute %872, %873 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%875 = torch.aten.t %arg175 : !torch.tensor -> !torch.tensor
%876 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%877 = torch.aten.view %result0_45, %876 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%878 = torch.aten.mm %877, %875 : !torch.tensor, !torch.tensor -> !torch.tensor
%879 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%880 = torch.operator "aten._unsafe_view"(%878, %879) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%881 = torch.aten.add_.Tensor %880, %arg174, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%882 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%883 = torch.aten.view %881, %882 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%884 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%885 = torch.aten.permute %883, %884 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%886 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%887 = torch.aten.view %863, %886 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%888 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%889 = torch.aten.permute %887, %888 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%890 = torch.aten.transpose.int %874, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%891 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%892 = torch.aten.expand %889, %891, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%893 = torch.operator "aten.clone"(%892, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%894 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%895 = torch.operator "aten._unsafe_view"(%893, %894) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%896 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%897 = torch.aten.expand %890, %896, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%898 = torch.operator "aten.clone"(%897, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%899 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%900 = torch.operator "aten._unsafe_view"(%898, %899) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%901 = torch.aten.bmm %895, %900 : !torch.tensor, !torch.tensor -> !torch.tensor
%902 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%903 = torch.operator "aten._unsafe_view"(%901, %902) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%904 = torch.aten.div.Tensor %903, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%905 = torch.aten.add.Tensor %904, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%906 = torch.aten._softmax %905, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%907 = torch.aten.empty_like %906, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%908 = torch.operator "aten.bernoulli_.float"(%907, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%909 = torch.aten.div_.Scalar %908, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%910 = torch.aten.mul.Tensor %906, %909 : !torch.tensor, !torch.tensor -> !torch.tensor
%911 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%912 = torch.aten.expand %910, %911, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%913 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%914 = torch.aten.view %912, %913 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%915 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%916 = torch.aten.expand %885, %915, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%917 = torch.operator "aten.clone"(%916, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%918 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%919 = torch.operator "aten._unsafe_view"(%917, %918) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%920 = torch.aten.bmm %914, %919 : !torch.tensor, !torch.tensor -> !torch.tensor
%921 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%922 = torch.operator "aten._unsafe_view"(%920, %921) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%923 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%924 = torch.aten.permute %922, %923 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%925 = torch.operator "aten.clone"(%924, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%926 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%927 = torch.aten.view %925, %926 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%928 = torch.aten.t %arg169 : !torch.tensor -> !torch.tensor
%929 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%930 = torch.aten.view %927, %929 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%931 = torch.aten.mm %930, %928 : !torch.tensor, !torch.tensor -> !torch.tensor
%932 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%933 = torch.operator "aten._unsafe_view"(%931, %932) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%934 = torch.aten.add_.Tensor %933, %arg168, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%935 = torch.aten.empty_like %934, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%936 = torch.operator "aten.bernoulli_.float"(%935, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%937 = torch.aten.div_.Scalar %936, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%938 = torch.aten.mul.Tensor %934, %937 : !torch.tensor, !torch.tensor -> !torch.tensor
%939 = torch.aten.add.Tensor %938, %result0_45, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%940 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_48, %result1_49, %result2_50 = torch.aten.native_layer_norm %939, %940, %arg167, %arg166, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%941 = torch.aten.t %arg177 : !torch.tensor -> !torch.tensor
%942 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%943 = torch.aten.view %result0_48, %942 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%944 = torch.aten.mm %943, %941 : !torch.tensor, !torch.tensor -> !torch.tensor
%945 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%946 = torch.operator "aten._unsafe_view"(%944, %945) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%947 = torch.aten.add_.Tensor %946, %arg176, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%948 = torch.aten.gelu %947 : !torch.tensor -> !torch.tensor
%949 = torch.aten.t %arg181 : !torch.tensor -> !torch.tensor
%950 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%951 = torch.aten.view %948, %950 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%952 = torch.aten.mm %951, %949 : !torch.tensor, !torch.tensor -> !torch.tensor
%953 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%954 = torch.operator "aten._unsafe_view"(%952, %953) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%955 = torch.aten.add_.Tensor %954, %arg180, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%956 = torch.aten.empty_like %955, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%957 = torch.operator "aten.bernoulli_.float"(%956, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%958 = torch.aten.div_.Scalar %957, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%959 = torch.aten.mul.Tensor %955, %958 : !torch.tensor, !torch.tensor -> !torch.tensor
%960 = torch.aten.add.Tensor %959, %result0_48, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%961 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_51, %result1_52, %result2_53 = torch.aten.native_layer_norm %960, %961, %arg179, %arg178, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%962 = torch.aten.t %arg189 : !torch.tensor -> !torch.tensor
%963 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%964 = torch.aten.view %result0_51, %963 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%965 = torch.aten.mm %964, %962 : !torch.tensor, !torch.tensor -> !torch.tensor
%966 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%967 = torch.operator "aten._unsafe_view"(%965, %966) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%968 = torch.aten.add_.Tensor %967, %arg188, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%969 = torch.aten.t %arg187 : !torch.tensor -> !torch.tensor
%970 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%971 = torch.aten.view %result0_51, %970 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%972 = torch.aten.mm %971, %969 : !torch.tensor, !torch.tensor -> !torch.tensor
%973 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%974 = torch.operator "aten._unsafe_view"(%972, %973) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%975 = torch.aten.add_.Tensor %974, %arg186, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%976 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%977 = torch.aten.view %975, %976 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%978 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%979 = torch.aten.permute %977, %978 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%980 = torch.aten.t %arg191 : !torch.tensor -> !torch.tensor
%981 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%982 = torch.aten.view %result0_51, %981 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%983 = torch.aten.mm %982, %980 : !torch.tensor, !torch.tensor -> !torch.tensor
%984 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%985 = torch.operator "aten._unsafe_view"(%983, %984) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%986 = torch.aten.add_.Tensor %985, %arg190, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%987 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%988 = torch.aten.view %986, %987 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%989 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%990 = torch.aten.permute %988, %989 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%991 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%992 = torch.aten.view %968, %991 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%993 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%994 = torch.aten.permute %992, %993 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%995 = torch.aten.transpose.int %979, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%996 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%997 = torch.aten.expand %994, %996, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%998 = torch.operator "aten.clone"(%997, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%999 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1000 = torch.operator "aten._unsafe_view"(%998, %999) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1001 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1002 = torch.aten.expand %995, %1001, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1003 = torch.operator "aten.clone"(%1002, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1004 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1005 = torch.operator "aten._unsafe_view"(%1003, %1004) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1006 = torch.aten.bmm %1000, %1005 : !torch.tensor, !torch.tensor -> !torch.tensor
%1007 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1008 = torch.operator "aten._unsafe_view"(%1006, %1007) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1009 = torch.aten.div.Tensor %1008, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1010 = torch.aten.add.Tensor %1009, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1011 = torch.aten._softmax %1010, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1012 = torch.aten.empty_like %1011, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1013 = torch.operator "aten.bernoulli_.float"(%1012, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1014 = torch.aten.div_.Scalar %1013, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1015 = torch.aten.mul.Tensor %1011, %1014 : !torch.tensor, !torch.tensor -> !torch.tensor
%1016 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1017 = torch.aten.expand %1015, %1016, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1018 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1019 = torch.aten.view %1017, %1018 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1020 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1021 = torch.aten.expand %990, %1020, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1022 = torch.operator "aten.clone"(%1021, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1023 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1024 = torch.operator "aten._unsafe_view"(%1022, %1023) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1025 = torch.aten.bmm %1019, %1024 : !torch.tensor, !torch.tensor -> !torch.tensor
%1026 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1027 = torch.operator "aten._unsafe_view"(%1025, %1026) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1028 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1029 = torch.aten.permute %1027, %1028 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1030 = torch.operator "aten.clone"(%1029, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1031 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1032 = torch.aten.view %1030, %1031 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1033 = torch.aten.t %arg185 : !torch.tensor -> !torch.tensor
%1034 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1035 = torch.aten.view %1032, %1034 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1036 = torch.aten.mm %1035, %1033 : !torch.tensor, !torch.tensor -> !torch.tensor
%1037 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1038 = torch.operator "aten._unsafe_view"(%1036, %1037) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1039 = torch.aten.add_.Tensor %1038, %arg184, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1040 = torch.aten.empty_like %1039, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1041 = torch.operator "aten.bernoulli_.float"(%1040, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1042 = torch.aten.div_.Scalar %1041, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1043 = torch.aten.mul.Tensor %1039, %1042 : !torch.tensor, !torch.tensor -> !torch.tensor
%1044 = torch.aten.add.Tensor %1043, %result0_51, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1045 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_54, %result1_55, %result2_56 = torch.aten.native_layer_norm %1044, %1045, %arg183, %arg182, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1046 = torch.aten.t %arg193 : !torch.tensor -> !torch.tensor
%1047 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1048 = torch.aten.view %result0_54, %1047 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1049 = torch.aten.mm %1048, %1046 : !torch.tensor, !torch.tensor -> !torch.tensor
%1050 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1051 = torch.operator "aten._unsafe_view"(%1049, %1050) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1052 = torch.aten.add_.Tensor %1051, %arg192, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1053 = torch.aten.gelu %1052 : !torch.tensor -> !torch.tensor
%1054 = torch.aten.t %arg197 : !torch.tensor -> !torch.tensor
%1055 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1056 = torch.aten.view %1053, %1055 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1057 = torch.aten.mm %1056, %1054 : !torch.tensor, !torch.tensor -> !torch.tensor
%1058 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1059 = torch.operator "aten._unsafe_view"(%1057, %1058) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1060 = torch.aten.add_.Tensor %1059, %arg196, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1061 = torch.aten.empty_like %1060, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1062 = torch.operator "aten.bernoulli_.float"(%1061, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1063 = torch.aten.div_.Scalar %1062, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1064 = torch.aten.mul.Tensor %1060, %1063 : !torch.tensor, !torch.tensor -> !torch.tensor
%1065 = torch.aten.add.Tensor %1064, %result0_54, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1066 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_57, %result1_58, %result2_59 = torch.aten.native_layer_norm %1065, %1066, %arg195, %arg194, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1067 = torch.aten.t %arg45 : !torch.tensor -> !torch.tensor
%1068 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1069 = torch.aten.view %result0_57, %1068 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1070 = torch.aten.mm %1069, %1067 : !torch.tensor, !torch.tensor -> !torch.tensor
%1071 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1072 = torch.operator "aten._unsafe_view"(%1070, %1071) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1073 = torch.aten.add_.Tensor %1072, %arg44, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1074 = torch.aten.t %arg43 : !torch.tensor -> !torch.tensor
%1075 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1076 = torch.aten.view %result0_57, %1075 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1077 = torch.aten.mm %1076, %1074 : !torch.tensor, !torch.tensor -> !torch.tensor
%1078 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1079 = torch.operator "aten._unsafe_view"(%1077, %1078) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1080 = torch.aten.add_.Tensor %1079, %arg42, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1081 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1082 = torch.aten.view %1080, %1081 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1083 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1084 = torch.aten.permute %1082, %1083 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1085 = torch.aten.t %arg47 : !torch.tensor -> !torch.tensor
%1086 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1087 = torch.aten.view %result0_57, %1086 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1088 = torch.aten.mm %1087, %1085 : !torch.tensor, !torch.tensor -> !torch.tensor
%1089 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1090 = torch.operator "aten._unsafe_view"(%1088, %1089) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1091 = torch.aten.add_.Tensor %1090, %arg46, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1092 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1093 = torch.aten.view %1091, %1092 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1094 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1095 = torch.aten.permute %1093, %1094 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1096 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1097 = torch.aten.view %1073, %1096 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1098 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1099 = torch.aten.permute %1097, %1098 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1100 = torch.aten.transpose.int %1084, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1101 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1102 = torch.aten.expand %1099, %1101, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1103 = torch.operator "aten.clone"(%1102, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1104 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1105 = torch.operator "aten._unsafe_view"(%1103, %1104) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1106 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1107 = torch.aten.expand %1100, %1106, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1108 = torch.operator "aten.clone"(%1107, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1109 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1110 = torch.operator "aten._unsafe_view"(%1108, %1109) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1111 = torch.aten.bmm %1105, %1110 : !torch.tensor, !torch.tensor -> !torch.tensor
%1112 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1113 = torch.operator "aten._unsafe_view"(%1111, %1112) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1114 = torch.aten.div.Tensor %1113, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1115 = torch.aten.add.Tensor %1114, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1116 = torch.aten._softmax %1115, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1117 = torch.aten.empty_like %1116, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1118 = torch.operator "aten.bernoulli_.float"(%1117, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1119 = torch.aten.div_.Scalar %1118, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1120 = torch.aten.mul.Tensor %1116, %1119 : !torch.tensor, !torch.tensor -> !torch.tensor
%1121 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1122 = torch.aten.expand %1120, %1121, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1123 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1124 = torch.aten.view %1122, %1123 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1125 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1126 = torch.aten.expand %1095, %1125, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1127 = torch.operator "aten.clone"(%1126, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1128 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1129 = torch.operator "aten._unsafe_view"(%1127, %1128) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1130 = torch.aten.bmm %1124, %1129 : !torch.tensor, !torch.tensor -> !torch.tensor
%1131 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1132 = torch.operator "aten._unsafe_view"(%1130, %1131) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1133 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1134 = torch.aten.permute %1132, %1133 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1135 = torch.operator "aten.clone"(%1134, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1136 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1137 = torch.aten.view %1135, %1136 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1138 = torch.aten.t %arg41 : !torch.tensor -> !torch.tensor
%1139 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1140 = torch.aten.view %1137, %1139 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1141 = torch.aten.mm %1140, %1138 : !torch.tensor, !torch.tensor -> !torch.tensor
%1142 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1143 = torch.operator "aten._unsafe_view"(%1141, %1142) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1144 = torch.aten.add_.Tensor %1143, %arg40, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1145 = torch.aten.empty_like %1144, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1146 = torch.operator "aten.bernoulli_.float"(%1145, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1147 = torch.aten.div_.Scalar %1146, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1148 = torch.aten.mul.Tensor %1144, %1147 : !torch.tensor, !torch.tensor -> !torch.tensor
%1149 = torch.aten.add.Tensor %1148, %result0_57, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1150 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_60, %result1_61, %result2_62 = torch.aten.native_layer_norm %1149, %1150, %arg39, %arg38, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1151 = torch.aten.t %arg49 : !torch.tensor -> !torch.tensor
%1152 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1153 = torch.aten.view %result0_60, %1152 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1154 = torch.aten.mm %1153, %1151 : !torch.tensor, !torch.tensor -> !torch.tensor
%1155 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1156 = torch.operator "aten._unsafe_view"(%1154, %1155) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1157 = torch.aten.add_.Tensor %1156, %arg48, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1158 = torch.aten.gelu %1157 : !torch.tensor -> !torch.tensor
%1159 = torch.aten.t %arg53 : !torch.tensor -> !torch.tensor
%1160 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1161 = torch.aten.view %1158, %1160 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1162 = torch.aten.mm %1161, %1159 : !torch.tensor, !torch.tensor -> !torch.tensor
%1163 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1164 = torch.operator "aten._unsafe_view"(%1162, %1163) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1165 = torch.aten.add_.Tensor %1164, %arg52, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1166 = torch.aten.empty_like %1165, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1167 = torch.operator "aten.bernoulli_.float"(%1166, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1168 = torch.aten.div_.Scalar %1167, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1169 = torch.aten.mul.Tensor %1165, %1168 : !torch.tensor, !torch.tensor -> !torch.tensor
%1170 = torch.aten.add.Tensor %1169, %result0_60, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1171 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_63, %result1_64, %result2_65 = torch.aten.native_layer_norm %1170, %1171, %arg51, %arg50, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1172 = torch.aten.t %arg61 : !torch.tensor -> !torch.tensor
%1173 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1174 = torch.aten.view %result0_63, %1173 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1175 = torch.aten.mm %1174, %1172 : !torch.tensor, !torch.tensor -> !torch.tensor
%1176 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1177 = torch.operator "aten._unsafe_view"(%1175, %1176) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1178 = torch.aten.add_.Tensor %1177, %arg60, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1179 = torch.aten.t %arg59 : !torch.tensor -> !torch.tensor
%1180 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1181 = torch.aten.view %result0_63, %1180 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1182 = torch.aten.mm %1181, %1179 : !torch.tensor, !torch.tensor -> !torch.tensor
%1183 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1184 = torch.operator "aten._unsafe_view"(%1182, %1183) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1185 = torch.aten.add_.Tensor %1184, %arg58, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1186 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1187 = torch.aten.view %1185, %1186 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1188 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1189 = torch.aten.permute %1187, %1188 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1190 = torch.aten.t %arg63 : !torch.tensor -> !torch.tensor
%1191 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1192 = torch.aten.view %result0_63, %1191 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1193 = torch.aten.mm %1192, %1190 : !torch.tensor, !torch.tensor -> !torch.tensor
%1194 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1195 = torch.operator "aten._unsafe_view"(%1193, %1194) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1196 = torch.aten.add_.Tensor %1195, %arg62, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1197 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1198 = torch.aten.view %1196, %1197 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1199 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1200 = torch.aten.permute %1198, %1199 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1201 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1202 = torch.aten.view %1178, %1201 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1203 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1204 = torch.aten.permute %1202, %1203 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1205 = torch.aten.transpose.int %1189, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1206 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1207 = torch.aten.expand %1204, %1206, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1208 = torch.operator "aten.clone"(%1207, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1209 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1210 = torch.operator "aten._unsafe_view"(%1208, %1209) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1211 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1212 = torch.aten.expand %1205, %1211, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1213 = torch.operator "aten.clone"(%1212, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1214 = torch.prim.ListConstruct %int48, %int64, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1215 = torch.operator "aten._unsafe_view"(%1213, %1214) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1216 = torch.aten.bmm %1210, %1215 : !torch.tensor, !torch.tensor -> !torch.tensor
%1217 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1218 = torch.operator "aten._unsafe_view"(%1216, %1217) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1219 = torch.aten.div.Tensor %1218, %2 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor
%1220 = torch.aten.add.Tensor %1219, %1, %int1 : !torch.tensor, !torch.tensor<[4,1,1,512],f32>, !torch.int -> !torch.tensor
%1221 = torch.aten._softmax %1220, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1222 = torch.aten.empty_like %1221, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1223 = torch.operator "aten.bernoulli_.float"(%1222, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1224 = torch.aten.div_.Scalar %1223, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1225 = torch.aten.mul.Tensor %1221, %1224 : !torch.tensor, !torch.tensor -> !torch.tensor
%1226 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1227 = torch.aten.expand %1225, %1226, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1228 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1229 = torch.aten.view %1227, %1228 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1230 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1231 = torch.aten.expand %1200, %1230, %false : !torch.tensor, !torch.list<!torch.int>, !torch.bool -> !torch.tensor
%1232 = torch.operator "aten.clone"(%1231, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1233 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1234 = torch.operator "aten._unsafe_view"(%1232, %1233) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1235 = torch.aten.bmm %1229, %1234 : !torch.tensor, !torch.tensor -> !torch.tensor
%1236 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1237 = torch.operator "aten._unsafe_view"(%1235, %1236) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1238 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1239 = torch.aten.permute %1237, %1238 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1240 = torch.operator "aten.clone"(%1239, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor
%1241 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1242 = torch.aten.view %1240, %1241 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1243 = torch.aten.t %arg57 : !torch.tensor -> !torch.tensor
%1244 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1245 = torch.aten.view %1242, %1244 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1246 = torch.aten.mm %1245, %1243 : !torch.tensor, !torch.tensor -> !torch.tensor
%1247 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1248 = torch.operator "aten._unsafe_view"(%1246, %1247) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1249 = torch.aten.add_.Tensor %1248, %arg56, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1250 = torch.aten.empty_like %1249, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1251 = torch.operator "aten.bernoulli_.float"(%1250, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1252 = torch.aten.div_.Scalar %1251, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1253 = torch.aten.mul.Tensor %1249, %1252 : !torch.tensor, !torch.tensor -> !torch.tensor
%1254 = torch.aten.add.Tensor %1253, %result0_63, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1255 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_66, %result1_67, %result2_68 = torch.aten.native_layer_norm %1254, %1255, %arg55, %arg54, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1256 = torch.aten.t %arg65 : !torch.tensor -> !torch.tensor
%1257 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1258 = torch.aten.view %result0_66, %1257 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1259 = torch.aten.mm %1258, %1256 : !torch.tensor, !torch.tensor -> !torch.tensor
%1260 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1261 = torch.operator "aten._unsafe_view"(%1259, %1260) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1262 = torch.aten.add_.Tensor %1261, %arg64, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1263 = torch.aten.gelu %1262 : !torch.tensor -> !torch.tensor
%1264 = torch.aten.t %arg69 : !torch.tensor -> !torch.tensor
%1265 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1266 = torch.aten.view %1263, %1265 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1267 = torch.aten.mm %1266, %1264 : !torch.tensor, !torch.tensor -> !torch.tensor
%1268 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1269 = torch.operator "aten._unsafe_view"(%1267, %1268) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1270 = torch.aten.add_.Tensor %1269, %arg68, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1271 = torch.aten.empty_like %1270, %none, %none, %none, %none, %int0 : !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none, !torch.int -> !torch.tensor
%1272 = torch.operator "aten.bernoulli_.float"(%1271, %float9.000000e-01, %none) : (!torch.tensor, !torch.float, !torch.none) -> !torch.tensor
%1273 = torch.aten.div_.Scalar %1272, %float9.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1274 = torch.aten.mul.Tensor %1270, %1273 : !torch.tensor, !torch.tensor -> !torch.tensor
%1275 = torch.aten.add.Tensor %1274, %result0_66, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1276 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_69, %result1_70, %result2_71 = torch.aten.native_layer_norm %1275, %1276, %arg67, %arg66, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1277 = torch.aten.t %arg204 : !torch.tensor -> !torch.tensor
%1278 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1279 = torch.aten.view %result0_69, %1278 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1280 = torch.aten.mm %1279, %1277 : !torch.tensor, !torch.tensor -> !torch.tensor
%1281 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1282 = torch.operator "aten._unsafe_view"(%1280, %1281) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1283 = torch.aten.add_.Tensor %1282, %arg203, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1284 = torch.aten.gelu %1283 : !torch.tensor -> !torch.tensor
%1285 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int>
%result0_72, %result1_73, %result2_74 = torch.aten.native_layer_norm %1284, %1285, %arg202, %arg201, %float9.999990e-13 : !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1286 = torch.aten.t %arg200 : !torch.tensor -> !torch.tensor
%1287 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1288 = torch.aten.view %result0_72, %1287 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1289 = torch.aten.mm %1288, %1286 : !torch.tensor, !torch.tensor -> !torch.tensor
%1290 = torch.prim.ListConstruct %int4, %int512, %int30522 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int>
%1291 = torch.operator "aten._unsafe_view"(%1289, %1290) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor
%1292 = torch.aten.add_.Tensor %1291, %arg199, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1293 = torch.prim.ListConstruct %int-1, %int30522 : (!torch.int, !torch.int) -> !torch.list<!torch.int>
%1294 = torch.aten.view %1292, %1293 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1295 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<!torch.int>
%1296 = torch.aten.view %arg208, %1295 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor
%1297 = torch.operator "aten._log_softmax"(%1294, %int1, %false) : (!torch.tensor, !torch.int, !torch.bool) -> !torch.tensor
%output, %total_weight = torch.aten.nll_loss_forward %1297, %1296, %none, %int1, %int-100 : !torch.tensor, !torch.tensor, !torch.none, !torch.int, !torch.int -> !torch.tensor, !torch.tensor
%1298 = torch.aten.transpose.int %1229, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1299 = torch.aten.transpose.int %1234, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1300 = torch.aten.transpose.int %1210, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1301 = torch.aten.transpose.int %1215, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1302 = torch.aten.transpose.int %1124, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1303 = torch.aten.transpose.int %1129, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1304 = torch.aten.transpose.int %1105, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1305 = torch.aten.transpose.int %1110, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1306 = torch.aten.transpose.int %1019, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1307 = torch.aten.transpose.int %1024, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1308 = torch.aten.transpose.int %1000, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1309 = torch.aten.transpose.int %1005, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1310 = torch.aten.transpose.int %914, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1311 = torch.aten.transpose.int %919, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1312 = torch.aten.transpose.int %895, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1313 = torch.aten.transpose.int %900, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1314 = torch.aten.transpose.int %809, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1315 = torch.aten.transpose.int %814, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1316 = torch.aten.transpose.int %790, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1317 = torch.aten.transpose.int %795, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1318 = torch.aten.transpose.int %704, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1319 = torch.aten.transpose.int %709, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1320 = torch.aten.transpose.int %685, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1321 = torch.aten.transpose.int %690, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1322 = torch.aten.transpose.int %599, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1323 = torch.aten.transpose.int %604, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1324 = torch.aten.transpose.int %580, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1325 = torch.aten.transpose.int %585, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1326 = torch.aten.transpose.int %494, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1327 = torch.aten.transpose.int %499, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1328 = torch.aten.transpose.int %475, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1329 = torch.aten.transpose.int %480, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1330 = torch.aten.transpose.int %389, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1331 = torch.aten.transpose.int %394, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1332 = torch.aten.transpose.int %370, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1333 = torch.aten.transpose.int %375, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1334 = torch.aten.transpose.int %284, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1335 = torch.aten.transpose.int %289, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1336 = torch.aten.transpose.int %265, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1337 = torch.aten.transpose.int %270, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1338 = torch.aten.transpose.int %179, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1339 = torch.aten.transpose.int %184, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1340 = torch.aten.transpose.int %160, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1341 = torch.aten.transpose.int %165, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1342 = torch.aten.transpose.int %74, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1343 = torch.aten.transpose.int %79, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1344 = torch.aten.transpose.int %55, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1345 = torch.aten.transpose.int %60, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1346 = torch.prim.ListConstruct %output, %1292, %307, %result2_20, %arg99, %result2_50, %1014, %1275, %arg37, %result1_16, %825, %591, %1076, %arg121, %arg38, %446, %result2_5, %arg162, %arg27, %1338, %1245, %result2_11, %result1_7, %arg101, %1224, %arg77, %arg137, %124, %arg95, %1297, %510, %result1_10, %arg117, %321, %594, %733, %arg178, %arg17, %540, %arg134, %arg50, %964, %result1_52, %arg71, %720, %arg41, %arg34, %arg175, %result2_2, %arg61, %arg54, %5, %1116, %204, %arg119, %arg66, %arg201, %result1_37, %arg31, %arg91, %1322, %457, %1330, %result1_46, %1153, %1314, %arg1, %97, %1332, %arg113, %656, %result2_35, %arg67, %arg131, %arg39, %arg195, %arg167, %1221, %334, %26, %1252, %328, %arg150, %result1_34, %517, %1301, %628, %519, %853, %arg189, %result1_4, %result2_56, %1334, %1303, %arg159, %906, %arg45, %279, %439, %212, %result2_41, %1325, %855, %arg153, %1335, %859, %arg82, %877, %489, %arg29, %result2_44, %arg182, %118, %result1_22, %1087, %arg151, %result1_49, %381, %arg146, %1316, %arg141, %1308, %6, %result2_47, %arg193, %947, %1324, %arg93, %622, %arg22, %1326, %arg43, %arg187, %result2_53, %615, %1340, %216, %982, %result2_23, %result1_28, %15, %arg139, %arg194, %arg155, %arg204, %66, %result1_43, %arg47, %667, %1056, %1310, %arg147, %result1_73, %1157, %1329, %arg183, %arg181, %1319, %741, %1300, %384, %1328, %804, %arg105, %1323, %69, %195, %1052, %1266, %1140, %arg18, %result2_38, %223, %300, %arg107, %645, %208, %result1_70, %832, %433, %171, %arg86, %939, %958, %1344, %result1, %562, %1304, %arg75, %1069, %90, %486, %arg65, %arg191, %418, %1147, %313, %arg161, %arg125, %1339, %result1_13, %result2_68, %arg165, %arg19, %426, %1342, %971, %arg69, %result2_8, %arg102, %result2_17, %arg2, %943, %1279, %arg89, %276, %412, %435, %1313, %arg129, %1315, %arg81, %750, %arg63, %1011, %1161, %1273, %1174, %1299, %arg111, %result1_55, %544, %801, %arg185, %930, %arg179, %arg114, %arg49, %699, %1288, %result1_31, %1309, %1044, %1048, %1262, %1305, %19, %result2_26, %arg98, %1306, %arg15, %937, %696, %arg85, %229, %761, %174, %result2_71, %arg87, %arg135, %643, %909, %result2_65, %1336, %arg123, %arg55, %arg59, %total_weight, %772, %1333, %result1_67, %1318, %arg130, %1168, %arg133, %247, %754, %arg97, %result1_64, %result2, %1331, %result1_40, %842, %arg21, %result1_25, %1321, %330, %737, %result2_14, %result1_58, %result2_29, %1149, %99, %352, %arg11, %142, %1254, %309, %arg25, %551, %arg109, %1345, %1258, %1343, %arg70, %arg202, %arg207, %414, %arg127, %834, %arg33, %arg57, %846, %624, %538, %arg143, %result1_19, %1302, %result1_1, %202, %1311, %1341, %1327, %37, %arg79, %arg197, %1337, %11, %result2_32, %1312, %649, %422, %236, %727, %arg149, %arg200, %arg177, %arg169, %1063, %1296, %1320, %arg7, %arg163, %951, %1297, %729, %960, %result2_62, %341, %1119, %result2_74, %arg118, %1283, %120, %arg115, %1181, %arg13, %107, %arg73, %1298, %arg53, %1170, %result2_59, %result1_61, %arg51, %arg173, %arg9, %632, %866, %arg145, %131, %arg35, %1065, %636, %748, %405, %1042, %arg83, %1192, %527, %103, %838, %225, %arg171, %1035, %531, %arg166, %arg23, %arg103, %arg6, %111, %1317, %1307, %arg157, %317, %523 : (!torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor) -> !torch.list<!torch.tensor>
return %1346 : !torch.list<!torch.tensor>
}
torch.class_type @__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule {
torch.method "forward", @__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule.forward
}
%0 = torch.nn_module {
} : !torch.nn.Module<"__torch__.torch.fx.graph_module.___torch_mangle_0.GraphModule">
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment