vivekkhandelwal1/sd_snippet_torch_ir.mlir

## sd_snippet_torch_ir.mlir
module attributes {torch.debug_module_name = "GraphModule"} {
  func.func @forward(%arg0: !torch.vtensor<[128,3,3,3],f32>, %arg1: !torch.vtensor<[128],f32>, %arg2: !torch.vtensor<[128],f32>, %arg3: !torch.vtensor<[128],f32>, %arg4: !torch.vtensor<[128,128,3,3],f32>, %arg5: !torch.vtensor<[128],f32>, %arg6: !torch.vtensor<[128],f32>, %arg7: !torch.vtensor<[128],f32>, %arg8: !torch.vtensor<[128,128,3,3],f32>, %arg9: !torch.vtensor<[128],f32>, %arg10: !torch.vtensor<[128],f32>, %arg11: !torch.vtensor<[128],f32>, %arg12: !torch.vtensor<[128,128,3,3],f32>, %arg13: !torch.vtensor<[128],f32>, %arg14: !torch.vtensor<[128],f32>, %arg15: !torch.vtensor<[128],f32>, %arg16: !torch.vtensor<[128,128,3,3],f32>, %arg17: !torch.vtensor<[128],f32>, %arg18: !torch.vtensor<[128,128,3,3],f32>, %arg19: !torch.vtensor<[128],f32>, %arg20: !torch.vtensor<[128],f32>, %arg21: !torch.vtensor<[128],f32>, %arg22: !torch.vtensor<[256,128,3,3],f32>, %arg23: !torch.vtensor<[256],f32>, %arg24: !torch.vtensor<[256],f32>, %arg25: !torch.vtensor<[256],f32>, %arg26: !torch.vtensor<[256,256,3,3],f32>, %arg27: !torch.vtensor<[256],f32>, %arg28: !torch.vtensor<[256,128,1,1],f32>, %arg29: !torch.vtensor<[256],f32>, %arg30: !torch.vtensor<[256],f32>, %arg31: !torch.vtensor<[256],f32>, %arg32: !torch.vtensor<[256,256,3,3],f32>, %arg33: !torch.vtensor<[256],f32>, %arg34: !torch.vtensor<[256],f32>, %arg35: !torch.vtensor<[256],f32>, %arg36: !torch.vtensor<[256,256,3,3],f32>, %arg37: !torch.vtensor<[256],f32>, %arg38: !torch.vtensor<[256,256,3,3],f32>, %arg39: !torch.vtensor<[256],f32>, %arg40: !torch.vtensor<[256],f32>, %arg41: !torch.vtensor<[256],f32>, %arg42: !torch.vtensor<[512,256,3,3],f32>, %arg43: !torch.vtensor<[512],f32>, %arg44: !torch.vtensor<[512],f32>, %arg45: !torch.vtensor<[512],f32>, %arg46: !torch.vtensor<[512,512,3,3],f32>, %arg47: !torch.vtensor<[512],f32>, %arg48: !torch.vtensor<[512,256,1,1],f32>, %arg49: !torch.vtensor<[512],f32>, %arg50: !torch.vtensor<[512],f32>, %arg51: !torch.vtensor<[512],f32>, %arg52: !torch.vtensor<[512,512,3,3],f32>, %arg53: !torch.vtensor<[512],f32>, %arg54: !torch.vtensor<[512],f32>, %arg55: !torch.vtensor<[512],f32>, %arg56: !torch.vtensor<[512,512,3,3],f32>, %arg57: !torch.vtensor<[512],f32>, %arg58: !torch.vtensor<[512,512,3,3],f32>, %arg59: !torch.vtensor<[512],f32>, %arg60: !torch.vtensor<[512],f32>, %arg61: !torch.vtensor<[512],f32>, %arg62: !torch.vtensor<[512,512,3,3],f32>, %arg63: !torch.vtensor<[512],f32>, %arg64: !torch.vtensor<[512],f32>, %arg65: !torch.vtensor<[512],f32>, %arg66: !torch.vtensor<[512,512,3,3],f32>, %arg67: !torch.vtensor<[512],f32>, %arg68: !torch.vtensor<[512],f32>, %arg69: !torch.vtensor<[512],f32>, %arg70: !torch.vtensor<[512,512,3,3],f32>, %arg71: !torch.vtensor<[512],f32>, %arg72: !torch.vtensor<[512],f32>, %arg73: !torch.vtensor<[512],f32>, %arg74: !torch.vtensor<[512,512,3,3],f32>, %arg75: !torch.vtensor<[512],f32>, %arg76: !torch.vtensor<[512],f32>, %arg77: !torch.vtensor<[512],f32>, %arg78: !torch.vtensor<[512,512,3,3],f32>, %arg79: !torch.vtensor<[512],f32>, %arg80: !torch.vtensor<[512],f32>, %arg81: !torch.vtensor<[512],f32>, %arg82: !torch.vtensor<[512,512,3,3],f32>, %arg83: !torch.vtensor<[512],f32>, %arg84: !torch.vtensor<[512],f32>, %arg85: !torch.vtensor<[512],f32>, %arg86: !torch.vtensor<[512,512],f32>, %arg87: !torch.vtensor<[512],f32>, %arg88: !torch.vtensor<[512,512],f32>, %arg89: !torch.vtensor<[512],f32>, %arg90: !torch.vtensor<[512,512],f32>, %arg91: !torch.vtensor<[512],f32>, %arg92: !torch.vtensor<[512,512],f32>, %arg93: !torch.vtensor<[512],f32>, %arg94: !torch.vtensor<[512],f32>, %arg95: !torch.vtensor<[512],f32>, %arg96: !torch.vtensor<[512,512,3,3],f32>, %arg97: !torch.vtensor<[512],f32>, %arg98: !torch.vtensor<[512],f32>, %arg99: !torch.vtensor<[512],f32>, %arg100: !torch.vtensor<[512,512,3,3],f32>, %arg101: !torch.vtensor<[512],f32>, %arg102: !torch.vtensor<[512],f32>, %arg103: !torch.vtensor<[512],f32>, %arg104: !torch.vtensor<[8,512,3,3],f32>, %arg105: !torch.vtensor<[8],f32>, %arg106: !torch.vtensor<[8,8,1,1],f32>, %arg107: !torch.vtensor<[8],f32>, %arg108: !torch.vtensor<[49409,768],f32>, %arg109: !torch.vtensor<[77,768],f32>, %arg110: !torch.vtensor<[768],f32>, %arg111: !torch.vtensor<[768],f32>, %arg112: !torch.vtensor<[768,768],f32>, %arg113: !torch.vtensor<[768],f32>, %arg114: !torch.vtensor<[768,768],f32>, %arg115: !torch.vtensor<[768],f32>, %arg116: !torch.vtensor<[768,768],f32>, %arg117: !torch.vtensor<[768],f32>, %arg118: !torch.vtensor<[768,768],f32>, %arg119: !torch.vtensor<[768],f32>, %arg120: !torch.vtensor<[768],f32>, %arg121: !torch.vtensor<[768],f32>, %arg122: !torch.vtensor<[3072,768],f32>, %arg123: !torch.vtensor<[3072],f32>, %arg124: !torch.vtensor<[768,3072],f32>, %arg125: !torch.vtensor<[768],f32>, %arg126: !torch.vtensor<[768],f32>, %arg127: !torch.vtensor<[768],f32>, %arg128: !torch.vtensor<[768,768],f32>, %arg129: !torch.vtensor<[768],f32>, %arg130: !torch.vtensor<[768,768],f32>, %arg131: !torch.vtensor<[768],f32>, %arg132: !torch.vtensor<[768,768],f32>, %arg133: !torch.vtensor<[768],f32>, %arg134: !torch.vtensor<[768,768],f32>, %arg135: !torch.vtensor<[768],f32>, %arg136: !torch.vtensor<[768],f32>, %arg137: !torch.vtensor<[768],f32>, %arg138: !torch.vtensor<[3072,768],f32>, %arg139: !torch.vtensor<[3072],f32>, %arg140: !torch.vtensor<[768,3072],f32>, %arg141: !torch.vtensor<[768],f32>, %arg142: !torch.vtensor<[768],f32>, %arg143: !torch.vtensor<[768],f32>, %arg144: !torch.vtensor<[768,768],f32>, %arg145: !torch.vtensor<[768],f32>, %arg146: !torch.vtensor<[768,768],f32>, %arg147: !torch.vtensor<[768],f32>, %arg148: !torch.vtensor<[768,768],f32>, %arg149: !torch.vtensor<[768],f32>, %arg150: !torch.vtensor<[768,768],f32>, %arg151: !torch.vtensor<[768],f32>, %arg152: !torch.vtensor<[768],f32>, %arg153: !torch.vtensor<[768],f32>, %arg154: !torch.vtensor<[3072,768],f32>, %arg155: !torch.vtensor<[3072],f32>, %arg156: !torch.vtensor<[768,3072],f32>, %arg157: !torch.vtensor<[768],f32>, %arg158: !torch.vtensor<[768],f32>, %arg159: !torch.vtensor<[768],f32>, %arg160: !torch.vtensor<[768,768],f32>, %arg161: !torch.vtensor<[768],f32>, %arg162: !torch.vtensor<[768,768],f32>, %arg163: !torch.vtensor<[768],f32>, %arg164: !torch.vtensor<[768,768],f32>, %arg165: !torch.vtensor<[768],f32>, %arg166: !torch.vtensor<[768,768],f32>, %arg167: !torch.vtensor<[768],f32>, %arg168: !torch.vtensor<[768],f32>, %arg169: !torch.vtensor<[768],f32>, %arg170: !torch.vtensor<[3072,768],f32>, %arg171: !torch.vtensor<[3072],f32>, %arg172: !torch.vtensor<[768,3072],f32>, %arg173: !torch.vtensor<[768],f32>, %arg174: !torch.vtensor<[768],f32>, %arg175: !torch.vtensor<[768],f32>, %arg176: !torch.vtensor<[768,768],f32>, %arg177: !torch.vtensor<[768],f32>, %arg178: !torch.vtensor<[768,768],f32>, %arg179: !torch.vtensor<[768],f32>, %arg180: !torch.vtensor<[768,768],f32>, %arg181: !torch.vtensor<[768],f32>, %arg182: !torch.vtensor<[768,768],f32>, %arg183: !torch.vtensor<[768],f32>, %arg184: !torch.vtensor<[768],f32>, %arg185: !torch.vtensor<[768],f32>, %arg186: !torch.vtensor<[3072,768],f32>, %arg187: !torch.vtensor<[3072],f32>, %arg188: !torch.vtensor<[768,3072],f32>, %arg189: !torch.vtensor<[768],f32>, %arg190: !torch.vtensor<[768],f32>, %arg191: !torch.vtensor<[768],f32>, %arg192: !torch.vtensor<[768,768],f32>, %arg193: !torch.vtensor<[768],f32>, %arg194: !torch.vtensor<[768,768],f32>, %arg195: !torch.vtensor<[768],f32>, %arg196: !torch.vtensor<[768,768],f32>, %arg197: !torch.vtensor<[768],f32>, %arg198: !torch.vtensor<[768,768],f32>, %arg199: !torch.vtensor<[768],f32>, %arg200: !torch.vtensor<[768],f32>, %arg201: !torch.vtensor<[768],f32>, %arg202: !torch.vtensor<[3072,768],f32>, %arg203: !torch.vtensor<[3072],f32>, %arg204: !torch.vtensor<[768,3072],f32>, %arg205: !torch.vtensor<[768],f32>, %arg206: !torch.vtensor<[768],f32>, %arg207: !torch.vtensor<[768],f32>, %arg208: !torch.vtensor<[768,768],f32>, %arg209: !torch.vtensor<[768],f32>, %arg210: !torch.vtensor<[768,768],f32>, %arg211: !torch.vtensor<[768],f32>, %arg212: !torch.vtensor<[768,768],f32>, %arg213: !torch.vtensor<[768],f32>, %arg214: !torch.vtensor<[768,768],f32>, %arg215: !torch.vtensor<[768],f32>, %arg216: !torch.vtensor<[768],f32>, %arg217: !torch.vtensor<[768],f32>, %arg218: !torch.vtensor<[3072,768],f32>, %arg219: !torch.vtensor<[3072],f32>, %arg220: !torch.vtensor<[768,3072],f32>, %arg221: !torch.vtensor<[768],f32>, %arg222: !torch.vtensor<[768],f32>, %arg223: !torch.vtensor<[768],f32>, %arg224: !torch.vtensor<[768,768],f32>, %arg225: !torch.vtensor<[768],f32>, %arg226: !torch.vtensor<[768,768],f32>, %arg227: !torch.vtensor<[768],f32>, %arg228: !torch.vtensor<[768,768],f32>, %arg229: !torch.vtensor<[768],f32>, %arg230: !torch.vtensor<[768,768],f32>, %arg231: !torch.vtensor<[768],f32>, %arg232: !torch.vtensor<[768],f32>, %arg233: !torch.vtensor<[768],f32>, %arg234: !torch.vtensor<[3072,768],f32>, %arg235: !torch.vtensor<[3072],f32>, %arg236: !torch.vtensor<[768,3072],f32>, %arg237: !torch.vtensor<[768],f32>, %arg238: !torch.vtensor<[768],f32>, %arg239: !torch.vtensor<[768],f32>, %arg240: !torch.vtensor<[768,768],f32>, %arg241: !torch.vtensor<[768],f32>, %arg242: !torch.vtensor<[768,768],f32>, %arg243: !torch.vtensor<[768],f32>, %arg244: !torch.vtensor<[768,768],f32>, %arg245: !torch.vtensor<[768],f32>, %arg246: !torch.vtensor<[768,768],f32>, %arg247: !torch.vtensor<[768],f32>, %arg248: !torch.vtensor<[768],f32>, %arg249: !torch.vtensor<[768],f32>, %arg250: !torch.vtensor<[3072,768],f32>, %arg251: !torch.vtensor<[3072],f32>, %arg252: !torch.vtensor<[768,3072],f32>, %arg253: !torch.vtensor<[768],f32>, %arg254: !torch.vtensor<[768],f32>, %arg255: !torch.vtensor<[768],f32>, %arg256: !torch.vtensor<[768,768],f32>, %arg257: !torch.vtensor<[768],f32>, %arg258: !torch.vtensor<[768,768],f32>, %arg259: !torch.vtensor<[768],f32>, %arg260: !torch.vtensor<[768,768],f32>, %arg261: !torch.vtensor<[768],f32>, %arg262: !torch.vtensor<[768,768],f32>, %arg263: !torch.vtensor<[768],f32>, %arg264: !torch.vtensor<[768],f32>, %arg265: !torch.vtensor<[768],f32>, %arg266: !torch.vtensor<[3072,768],f32>, %arg267: !torch.vtensor<[3072],f32>, %arg268: !torch.vtensor<[768,3072],f32>, %arg269: !torch.vtensor<[768],f32>, %arg270: !torch.vtensor<[768],f32>, %arg271: !torch.vtensor<[768],f32>, %arg272: !torch.vtensor<[768,768],f32>, %arg273: !torch.vtensor<[768],f32>, %arg274: !torch.vtensor<[768,768],f32>, %arg275: !torch.vtensor<[768],f32>, %arg276: !torch.vtensor<[768,768],f32>, %arg277: !torch.vtensor<[768],f32>, %arg278: !torch.vtensor<[768,768],f32>, %arg279: !torch.vtensor<[768],f32>, %arg280: !torch.vtensor<[768],f32>, %arg281: !torch.vtensor<[768],f32>, %arg282: !torch.vtensor<[3072,768],f32>, %arg283: !torch.vtensor<[3072],f32>, %arg284: !torch.vtensor<[768,3072],f32>, %arg285: !torch.vtensor<[768],f32>, %arg286: !torch.vtensor<[768],f32>, %arg287: !torch.vtensor<[768],f32>, %arg288: !torch.vtensor<[768,768],f32>, %arg289: !torch.vtensor<[768],f32>, %arg290: !torch.vtensor<[768,768],f32>, %arg291: !torch.vtensor<[768],f32>, %arg292: !torch.vtensor<[768,768],f32>, %arg293: !torch.vtensor<[768],f32>, %arg294: !torch.vtensor<[768,768],f32>, %arg295: !torch.vtensor<[768],f32>, %arg296: !torch.vtensor<[768],f32>, %arg297: !torch.vtensor<[768],f32>, %arg298: !torch.vtensor<[3072,768],f32>, %arg299: !torch.vtensor<[3072],f32>, %arg300: !torch.vtensor<[768,3072],f32>, %arg301: !torch.vtensor<[768],f32>, %arg302: !torch.vtensor<[768],f32>, %arg303: !torch.vtensor<[768],f32>, %arg304: !torch.vtensor<[1,77],si64>, %arg305: !torch.vtensor<[1,77],si64>, %arg306: !torch.vtensor<[1,3,512,512],f32>, %arg307: !torch.vtensor<[1000],f32>) -> (!torch.vtensor<[1,4,64,64],f32>, !torch.vtensor<[1],si64>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,4,64,64],f32>, !torch.vtensor<[1000],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77],si64>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,768],f32>) {
    %int2 = torch.constant.int 2
    %int1 = torch.constant.int 1
    %true = torch.constant.bool true
    %none = torch.constant.none
    %int768 = torch.constant.int 768
    %int77 = torch.constant.int 77
    %int0 = torch.constant.int 0
    %float1.000000e00 = torch.constant.float 1.000000e+00
    %false = torch.constant.bool false
    %float0.000000e00 = torch.constant.float 0.000000e+00
    %float1.000000e03 = torch.constant.float 1.000000e+03
    %int4 = torch.constant.int 4
    %float-2.000000e00 = torch.constant.float -2.000000e+00
    %float6.283180e00 = torch.constant.float 6.283180e+00
    %int7 = torch.constant.int 7
    %int65536 = torch.constant.int 65536
    %int16 = torch.constant.int 16
    %int4096 = torch.constant.int 4096
    %int6 = torch.constant.int 6
    %int262144 = torch.constant.int 262144
    %int16384 = torch.constant.int 16384
    %int131072 = torch.constant.int 131072
    %int8 = torch.constant.int 8
    %int524288 = torch.constant.int 524288
    %int1048576 = torch.constant.int 1048576
    %0 = torch.vtensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.vtensor<[],f32>
    %int-1 = torch.constant.int -1
    %int-2 = torch.constant.int -2
    %float-3.000000e01 = torch.constant.float -3.000000e+01
    %int32 = torch.constant.int 32
    %int3 = torch.constant.int 3
    %float9.999990e-07 = torch.constant.float 9.9999999999999995E-7
    %int128 = torch.constant.int 128
    %int512 = torch.constant.int 512
    %int256 = torch.constant.int 256
    %int64 = torch.constant.int 64
    %float4.419420e-02 = torch.constant.float 0.044194173824159216
    %float2.000000e01 = torch.constant.float 2.000000e+01
    %float5.000000e-01 = torch.constant.float 5.000000e-01
    %float1.821500e-01 = torch.constant.float 1.821500e-01
    %float1.000000e-05 = torch.constant.float 1.000000e-05
    %float1.250000e-01 = torch.constant.float 1.250000e-01
    %int12 = torch.constant.int 12
    %int3072 = torch.constant.int 3072
    %float1.702000e00 = torch.constant.float 1.702000e+00
    %cpu = torch.constant.device "cpu"
    %1 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
    %2 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int>
    %3 = torch.aten.convolution %arg306, %arg0, %arg1, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,3,512,512],f32>, !torch.vtensor<[128,3,3,3],f32>, !torch.vtensor<[128],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,128,512,512],f32>
    %4 = torch.prim.ListConstruct %int1, %int32, %int4, %int262144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5 = torch.aten.view %3, %4 : !torch.vtensor<[1,128,512,512],f32>, !torch.list<int> -> !torch.vtensor<[1,32,4,262144],f32>
    %6 = torch.prim.ListConstruct %int2, %int3 : (!torch.int, !torch.int) -> !torch.list<int>
    %7 = torch.aten.to.dtype %5, %int7, %false, %false, %none : !torch.vtensor<[1,32,4,262144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,4,262144],f64>
    %8 = torch.aten.sum.dim_IntList %7, %6, %true, %none : !torch.vtensor<[1,32,4,262144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %9 = torch.aten.div.Scalar %8, %int1048576 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %10 = torch.aten.sub.Tensor %7, %9, %float1.000000e00 : !torch.vtensor<[1,32,4,262144],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,4,262144],f64>
    %11 = torch.aten.mul.Tensor %10, %10 : !torch.vtensor<[1,32,4,262144],f64>, !torch.vtensor<[1,32,4,262144],f64> -> !torch.vtensor<[1,32,4,262144],f64>
    %12 = torch.aten.sum.dim_IntList %11, %6, %true, %none : !torch.vtensor<[1,32,4,262144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %13 = torch.aten.div.Scalar %12, %int1048576 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %14 = torch.aten.to.dtype %13, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %15 = torch.aten.sum.dim_IntList %5, %6, %true, %none : !torch.vtensor<[1,32,4,262144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %16 = torch.aten.div.Scalar %15, %int1048576 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %17 = torch.aten.add.Scalar %14, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %18 = torch.aten.rsqrt %17 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %19 = torch.aten.sub.Tensor %5, %16, %int1 : !torch.vtensor<[1,32,4,262144],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,4,262144],f32>
    %20 = torch.aten.mul.Tensor %19, %18 : !torch.vtensor<[1,32,4,262144],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,4,262144],f32>
    %21 = torch.prim.ListConstruct %int1, %int128, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22 = torch.aten.view %20, %21 : !torch.vtensor<[1,32,4,262144],f32>, !torch.list<int> -> !torch.vtensor<[1,128,512,512],f32>
    %23 = torch.aten.unsqueeze %arg3, %int0 : !torch.vtensor<[128],f32>, !torch.int -> !torch.vtensor<[1,128],f32>
    %24 = torch.aten.unsqueeze %23, %int2 : !torch.vtensor<[1,128],f32>, !torch.int -> !torch.vtensor<[1,128,1],f32>
    %25 = torch.aten.unsqueeze %24, %int3 : !torch.vtensor<[1,128,1],f32>, !torch.int -> !torch.vtensor<[1,128,1,1],f32>
    %26 = torch.aten.unsqueeze %arg2, %int0 : !torch.vtensor<[128],f32>, !torch.int -> !torch.vtensor<[1,128],f32>
    %27 = torch.aten.unsqueeze %26, %int2 : !torch.vtensor<[1,128],f32>, !torch.int -> !torch.vtensor<[1,128,1],f32>
    %28 = torch.aten.unsqueeze %27, %int3 : !torch.vtensor<[1,128,1],f32>, !torch.int -> !torch.vtensor<[1,128,1,1],f32>
    %29 = torch.aten.mul.Tensor %22, %28 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[1,128,1,1],f32> -> !torch.vtensor<[1,128,512,512],f32>
    %30 = torch.aten.add.Tensor %29, %25, %int1 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[1,128,1,1],f32>, !torch.int -> !torch.vtensor<[1,128,512,512],f32>
    %31 = torch.aten.sigmoid %30 : !torch.vtensor<[1,128,512,512],f32> -> !torch.vtensor<[1,128,512,512],f32>
    %32 = torch.aten.mul.Tensor %31, %30 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[1,128,512,512],f32> -> !torch.vtensor<[1,128,512,512],f32>
    %33 = torch.aten.convolution %32, %arg4, %arg5, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[128,128,3,3],f32>, !torch.vtensor<[128],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,128,512,512],f32>
    %34 = torch.aten.view %33, %4 : !torch.vtensor<[1,128,512,512],f32>, !torch.list<int> -> !torch.vtensor<[1,32,4,262144],f32>
    %35 = torch.aten.to.dtype %34, %int7, %false, %false, %none : !torch.vtensor<[1,32,4,262144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,4,262144],f64>
    %36 = torch.aten.sum.dim_IntList %35, %6, %true, %none : !torch.vtensor<[1,32,4,262144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %37 = torch.aten.div.Scalar %36, %int1048576 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %38 = torch.aten.sub.Tensor %35, %37, %float1.000000e00 : !torch.vtensor<[1,32,4,262144],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,4,262144],f64>
    %39 = torch.aten.mul.Tensor %38, %38 : !torch.vtensor<[1,32,4,262144],f64>, !torch.vtensor<[1,32,4,262144],f64> -> !torch.vtensor<[1,32,4,262144],f64>
    %40 = torch.aten.sum.dim_IntList %39, %6, %true, %none : !torch.vtensor<[1,32,4,262144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %41 = torch.aten.div.Scalar %40, %int1048576 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %42 = torch.aten.to.dtype %41, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %43 = torch.aten.sum.dim_IntList %34, %6, %true, %none : !torch.vtensor<[1,32,4,262144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %44 = torch.aten.div.Scalar %43, %int1048576 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %45 = torch.aten.add.Scalar %42, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %46 = torch.aten.rsqrt %45 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %47 = torch.aten.sub.Tensor %34, %44, %int1 : !torch.vtensor<[1,32,4,262144],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,4,262144],f32>
    %48 = torch.aten.mul.Tensor %47, %46 : !torch.vtensor<[1,32,4,262144],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,4,262144],f32>
    %49 = torch.aten.view %48, %21 : !torch.vtensor<[1,32,4,262144],f32>, !torch.list<int> -> !torch.vtensor<[1,128,512,512],f32>
    %50 = torch.aten.unsqueeze %arg7, %int0 : !torch.vtensor<[128],f32>, !torch.int -> !torch.vtensor<[1,128],f32>
    %51 = torch.aten.unsqueeze %50, %int2 : !torch.vtensor<[1,128],f32>, !torch.int -> !torch.vtensor<[1,128,1],f32>
    %52 = torch.aten.unsqueeze %51, %int3 : !torch.vtensor<[1,128,1],f32>, !torch.int -> !torch.vtensor<[1,128,1,1],f32>
    %53 = torch.aten.unsqueeze %arg6, %int0 : !torch.vtensor<[128],f32>, !torch.int -> !torch.vtensor<[1,128],f32>
    %54 = torch.aten.unsqueeze %53, %int2 : !torch.vtensor<[1,128],f32>, !torch.int -> !torch.vtensor<[1,128,1],f32>
    %55 = torch.aten.unsqueeze %54, %int3 : !torch.vtensor<[1,128,1],f32>, !torch.int -> !torch.vtensor<[1,128,1,1],f32>
    %56 = torch.aten.mul.Tensor %49, %55 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[1,128,1,1],f32> -> !torch.vtensor<[1,128,512,512],f32>
    %57 = torch.aten.add.Tensor %56, %52, %int1 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[1,128,1,1],f32>, !torch.int -> !torch.vtensor<[1,128,512,512],f32>
    %58 = torch.aten.sigmoid %57 : !torch.vtensor<[1,128,512,512],f32> -> !torch.vtensor<[1,128,512,512],f32>
    %59 = torch.aten.mul.Tensor %58, %57 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[1,128,512,512],f32> -> !torch.vtensor<[1,128,512,512],f32>
    %60 = torch.aten.convolution %59, %arg8, %arg9, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[128,128,3,3],f32>, !torch.vtensor<[128],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,128,512,512],f32>
    %61 = torch.aten.add.Tensor %3, %60, %int1 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[1,128,512,512],f32>, !torch.int -> !torch.vtensor<[1,128,512,512],f32>
    %62 = torch.aten.div.Scalar %61, %float1.000000e00 : !torch.vtensor<[1,128,512,512],f32>, !torch.float -> !torch.vtensor<[1,128,512,512],f32>
    %63 = torch.aten.view %62, %4 : !torch.vtensor<[1,128,512,512],f32>, !torch.list<int> -> !torch.vtensor<[1,32,4,262144],f32>
    %64 = torch.aten.to.dtype %63, %int7, %false, %false, %none : !torch.vtensor<[1,32,4,262144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,4,262144],f64>
    %65 = torch.aten.sum.dim_IntList %64, %6, %true, %none : !torch.vtensor<[1,32,4,262144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %66 = torch.aten.div.Scalar %65, %int1048576 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %67 = torch.aten.sub.Tensor %64, %66, %float1.000000e00 : !torch.vtensor<[1,32,4,262144],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,4,262144],f64>
    %68 = torch.aten.mul.Tensor %67, %67 : !torch.vtensor<[1,32,4,262144],f64>, !torch.vtensor<[1,32,4,262144],f64> -> !torch.vtensor<[1,32,4,262144],f64>
    %69 = torch.aten.sum.dim_IntList %68, %6, %true, %none : !torch.vtensor<[1,32,4,262144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %70 = torch.aten.div.Scalar %69, %int1048576 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %71 = torch.aten.to.dtype %70, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %72 = torch.aten.sum.dim_IntList %63, %6, %true, %none : !torch.vtensor<[1,32,4,262144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %73 = torch.aten.div.Scalar %72, %int1048576 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %74 = torch.aten.add.Scalar %71, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %75 = torch.aten.rsqrt %74 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %76 = torch.aten.sub.Tensor %63, %73, %int1 : !torch.vtensor<[1,32,4,262144],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,4,262144],f32>
    %77 = torch.aten.mul.Tensor %76, %75 : !torch.vtensor<[1,32,4,262144],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,4,262144],f32>
    %78 = torch.aten.view %77, %21 : !torch.vtensor<[1,32,4,262144],f32>, !torch.list<int> -> !torch.vtensor<[1,128,512,512],f32>
    %79 = torch.aten.unsqueeze %arg11, %int0 : !torch.vtensor<[128],f32>, !torch.int -> !torch.vtensor<[1,128],f32>
    %80 = torch.aten.unsqueeze %79, %int2 : !torch.vtensor<[1,128],f32>, !torch.int -> !torch.vtensor<[1,128,1],f32>
    %81 = torch.aten.unsqueeze %80, %int3 : !torch.vtensor<[1,128,1],f32>, !torch.int -> !torch.vtensor<[1,128,1,1],f32>
    %82 = torch.aten.unsqueeze %arg10, %int0 : !torch.vtensor<[128],f32>, !torch.int -> !torch.vtensor<[1,128],f32>
    %83 = torch.aten.unsqueeze %82, %int2 : !torch.vtensor<[1,128],f32>, !torch.int -> !torch.vtensor<[1,128,1],f32>
    %84 = torch.aten.unsqueeze %83, %int3 : !torch.vtensor<[1,128,1],f32>, !torch.int -> !torch.vtensor<[1,128,1,1],f32>
    %85 = torch.aten.mul.Tensor %78, %84 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[1,128,1,1],f32> -> !torch.vtensor<[1,128,512,512],f32>
    %86 = torch.aten.add.Tensor %85, %81, %int1 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[1,128,1,1],f32>, !torch.int -> !torch.vtensor<[1,128,512,512],f32>
    %87 = torch.aten.sigmoid %86 : !torch.vtensor<[1,128,512,512],f32> -> !torch.vtensor<[1,128,512,512],f32>
    %88 = torch.aten.mul.Tensor %87, %86 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[1,128,512,512],f32> -> !torch.vtensor<[1,128,512,512],f32>
    %89 = torch.aten.convolution %88, %arg12, %arg13, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[128,128,3,3],f32>, !torch.vtensor<[128],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,128,512,512],f32>
    %90 = torch.aten.view %89, %4 : !torch.vtensor<[1,128,512,512],f32>, !torch.list<int> -> !torch.vtensor<[1,32,4,262144],f32>
    %91 = torch.aten.to.dtype %90, %int7, %false, %false, %none : !torch.vtensor<[1,32,4,262144],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,4,262144],f64>
    %92 = torch.aten.sum.dim_IntList %91, %6, %true, %none : !torch.vtensor<[1,32,4,262144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %93 = torch.aten.div.Scalar %92, %int1048576 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %94 = torch.aten.sub.Tensor %91, %93, %float1.000000e00 : !torch.vtensor<[1,32,4,262144],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,4,262144],f64>
    %95 = torch.aten.mul.Tensor %94, %94 : !torch.vtensor<[1,32,4,262144],f64>, !torch.vtensor<[1,32,4,262144],f64> -> !torch.vtensor<[1,32,4,262144],f64>
    %96 = torch.aten.sum.dim_IntList %95, %6, %true, %none : !torch.vtensor<[1,32,4,262144],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %97 = torch.aten.div.Scalar %96, %int1048576 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %98 = torch.aten.to.dtype %97, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %99 = torch.aten.sum.dim_IntList %90, %6, %true, %none : !torch.vtensor<[1,32,4,262144],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %100 = torch.aten.div.Scalar %99, %int1048576 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %101 = torch.aten.add.Scalar %98, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %102 = torch.aten.rsqrt %101 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %103 = torch.aten.sub.Tensor %90, %100, %int1 : !torch.vtensor<[1,32,4,262144],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,4,262144],f32>
    %104 = torch.aten.mul.Tensor %103, %102 : !torch.vtensor<[1,32,4,262144],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,4,262144],f32>
    %105 = torch.aten.view %104, %21 : !torch.vtensor<[1,32,4,262144],f32>, !torch.list<int> -> !torch.vtensor<[1,128,512,512],f32>
    %106 = torch.aten.unsqueeze %arg15, %int0 : !torch.vtensor<[128],f32>, !torch.int -> !torch.vtensor<[1,128],f32>
    %107 = torch.aten.unsqueeze %106, %int2 : !torch.vtensor<[1,128],f32>, !torch.int -> !torch.vtensor<[1,128,1],f32>
    %108 = torch.aten.unsqueeze %107, %int3 : !torch.vtensor<[1,128,1],f32>, !torch.int -> !torch.vtensor<[1,128,1,1],f32>
    %109 = torch.aten.unsqueeze %arg14, %int0 : !torch.vtensor<[128],f32>, !torch.int -> !torch.vtensor<[1,128],f32>
    %110 = torch.aten.unsqueeze %109, %int2 : !torch.vtensor<[1,128],f32>, !torch.int -> !torch.vtensor<[1,128,1],f32>
    %111 = torch.aten.unsqueeze %110, %int3 : !torch.vtensor<[1,128,1],f32>, !torch.int -> !torch.vtensor<[1,128,1,1],f32>
    %112 = torch.aten.mul.Tensor %105, %111 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[1,128,1,1],f32> -> !torch.vtensor<[1,128,512,512],f32>
    %113 = torch.aten.add.Tensor %112, %108, %int1 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[1,128,1,1],f32>, !torch.int -> !torch.vtensor<[1,128,512,512],f32>
    %114 = torch.aten.sigmoid %113 : !torch.vtensor<[1,128,512,512],f32> -> !torch.vtensor<[1,128,512,512],f32>
    %115 = torch.aten.mul.Tensor %114, %113 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[1,128,512,512],f32> -> !torch.vtensor<[1,128,512,512],f32>
    %116 = torch.aten.convolution %115, %arg16, %arg17, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[128,128,3,3],f32>, !torch.vtensor<[128],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,128,512,512],f32>
    %117 = torch.aten.add.Tensor %62, %116, %int1 : !torch.vtensor<[1,128,512,512],f32>, !torch.vtensor<[1,128,512,512],f32>, !torch.int -> !torch.vtensor<[1,128,512,512],f32>
    %118 = torch.aten.div.Scalar %117, %float1.000000e00 : !torch.vtensor<[1,128,512,512],f32>, !torch.float -> !torch.vtensor<[1,128,512,512],f32>
    %119 = torch.prim.ListConstruct %int0, %int1, %int0, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %120 = torch.aten.constant_pad_nd %118, %119, %float0.000000e00 : !torch.vtensor<[1,128,512,512],f32>, !torch.list<int>, !torch.float -> !torch.vtensor<[1,128,513,513],f32>
    %121 = torch.prim.ListConstruct %int2, %int2 : (!torch.int, !torch.int) -> !torch.list<int>
    %122 = torch.aten.convolution %120, %arg18, %arg19, %121, %2, %1, %false, %2, %int1 : !torch.vtensor<[1,128,513,513],f32>, !torch.vtensor<[128,128,3,3],f32>, !torch.vtensor<[128],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,128,256,256],f32>
    %123 = torch.prim.ListConstruct %int1, %int32, %int4, %int65536 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %124 = torch.aten.view %122, %123 : !torch.vtensor<[1,128,256,256],f32>, !torch.list<int> -> !torch.vtensor<[1,32,4,65536],f32>
    %125 = torch.aten.to.dtype %124, %int7, %false, %false, %none : !torch.vtensor<[1,32,4,65536],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,4,65536],f64>
    %126 = torch.aten.sum.dim_IntList %125, %6, %true, %none : !torch.vtensor<[1,32,4,65536],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %127 = torch.aten.div.Scalar %126, %int262144 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %128 = torch.aten.sub.Tensor %125, %127, %float1.000000e00 : !torch.vtensor<[1,32,4,65536],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,4,65536],f64>
    %129 = torch.aten.mul.Tensor %128, %128 : !torch.vtensor<[1,32,4,65536],f64>, !torch.vtensor<[1,32,4,65536],f64> -> !torch.vtensor<[1,32,4,65536],f64>
    %130 = torch.aten.sum.dim_IntList %129, %6, %true, %none : !torch.vtensor<[1,32,4,65536],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %131 = torch.aten.div.Scalar %130, %int262144 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %132 = torch.aten.to.dtype %131, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %133 = torch.aten.sum.dim_IntList %124, %6, %true, %none : !torch.vtensor<[1,32,4,65536],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %134 = torch.aten.div.Scalar %133, %int262144 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %135 = torch.aten.add.Scalar %132, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %136 = torch.aten.rsqrt %135 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %137 = torch.aten.sub.Tensor %124, %134, %int1 : !torch.vtensor<[1,32,4,65536],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,4,65536],f32>
    %138 = torch.aten.mul.Tensor %137, %136 : !torch.vtensor<[1,32,4,65536],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,4,65536],f32>
    %139 = torch.prim.ListConstruct %int1, %int128, %int256, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %140 = torch.aten.view %138, %139 : !torch.vtensor<[1,32,4,65536],f32>, !torch.list<int> -> !torch.vtensor<[1,128,256,256],f32>
    %141 = torch.aten.unsqueeze %arg21, %int0 : !torch.vtensor<[128],f32>, !torch.int -> !torch.vtensor<[1,128],f32>
    %142 = torch.aten.unsqueeze %141, %int2 : !torch.vtensor<[1,128],f32>, !torch.int -> !torch.vtensor<[1,128,1],f32>
    %143 = torch.aten.unsqueeze %142, %int3 : !torch.vtensor<[1,128,1],f32>, !torch.int -> !torch.vtensor<[1,128,1,1],f32>
    %144 = torch.aten.unsqueeze %arg20, %int0 : !torch.vtensor<[128],f32>, !torch.int -> !torch.vtensor<[1,128],f32>
    %145 = torch.aten.unsqueeze %144, %int2 : !torch.vtensor<[1,128],f32>, !torch.int -> !torch.vtensor<[1,128,1],f32>
    %146 = torch.aten.unsqueeze %145, %int3 : !torch.vtensor<[1,128,1],f32>, !torch.int -> !torch.vtensor<[1,128,1,1],f32>
    %147 = torch.aten.mul.Tensor %140, %146 : !torch.vtensor<[1,128,256,256],f32>, !torch.vtensor<[1,128,1,1],f32> -> !torch.vtensor<[1,128,256,256],f32>
    %148 = torch.aten.add.Tensor %147, %143, %int1 : !torch.vtensor<[1,128,256,256],f32>, !torch.vtensor<[1,128,1,1],f32>, !torch.int -> !torch.vtensor<[1,128,256,256],f32>
    %149 = torch.aten.sigmoid %148 : !torch.vtensor<[1,128,256,256],f32> -> !torch.vtensor<[1,128,256,256],f32>
    %150 = torch.aten.mul.Tensor %149, %148 : !torch.vtensor<[1,128,256,256],f32>, !torch.vtensor<[1,128,256,256],f32> -> !torch.vtensor<[1,128,256,256],f32>
    %151 = torch.aten.convolution %150, %arg22, %arg23, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,128,256,256],f32>, !torch.vtensor<[256,128,3,3],f32>, !torch.vtensor<[256],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,256,256,256],f32>
    %152 = torch.prim.ListConstruct %int1, %int32, %int8, %int65536 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %153 = torch.aten.view %151, %152 : !torch.vtensor<[1,256,256,256],f32>, !torch.list<int> -> !torch.vtensor<[1,32,8,65536],f32>
    %154 = torch.aten.to.dtype %153, %int7, %false, %false, %none : !torch.vtensor<[1,32,8,65536],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,8,65536],f64>
    %155 = torch.aten.sum.dim_IntList %154, %6, %true, %none : !torch.vtensor<[1,32,8,65536],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %156 = torch.aten.div.Scalar %155, %int524288 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %157 = torch.aten.sub.Tensor %154, %156, %float1.000000e00 : !torch.vtensor<[1,32,8,65536],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,8,65536],f64>
    %158 = torch.aten.mul.Tensor %157, %157 : !torch.vtensor<[1,32,8,65536],f64>, !torch.vtensor<[1,32,8,65536],f64> -> !torch.vtensor<[1,32,8,65536],f64>
    %159 = torch.aten.sum.dim_IntList %158, %6, %true, %none : !torch.vtensor<[1,32,8,65536],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %160 = torch.aten.div.Scalar %159, %int524288 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %161 = torch.aten.to.dtype %160, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %162 = torch.aten.sum.dim_IntList %153, %6, %true, %none : !torch.vtensor<[1,32,8,65536],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %163 = torch.aten.div.Scalar %162, %int524288 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %164 = torch.aten.add.Scalar %161, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %165 = torch.aten.rsqrt %164 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %166 = torch.aten.sub.Tensor %153, %163, %int1 : !torch.vtensor<[1,32,8,65536],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,8,65536],f32>
    %167 = torch.aten.mul.Tensor %166, %165 : !torch.vtensor<[1,32,8,65536],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,8,65536],f32>
    %168 = torch.prim.ListConstruct %int1, %int256, %int256, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %169 = torch.aten.view %167, %168 : !torch.vtensor<[1,32,8,65536],f32>, !torch.list<int> -> !torch.vtensor<[1,256,256,256],f32>
    %170 = torch.aten.unsqueeze %arg25, %int0 : !torch.vtensor<[256],f32>, !torch.int -> !torch.vtensor<[1,256],f32>
    %171 = torch.aten.unsqueeze %170, %int2 : !torch.vtensor<[1,256],f32>, !torch.int -> !torch.vtensor<[1,256,1],f32>
    %172 = torch.aten.unsqueeze %171, %int3 : !torch.vtensor<[1,256,1],f32>, !torch.int -> !torch.vtensor<[1,256,1,1],f32>
    %173 = torch.aten.unsqueeze %arg24, %int0 : !torch.vtensor<[256],f32>, !torch.int -> !torch.vtensor<[1,256],f32>
    %174 = torch.aten.unsqueeze %173, %int2 : !torch.vtensor<[1,256],f32>, !torch.int -> !torch.vtensor<[1,256,1],f32>
    %175 = torch.aten.unsqueeze %174, %int3 : !torch.vtensor<[1,256,1],f32>, !torch.int -> !torch.vtensor<[1,256,1,1],f32>
    %176 = torch.aten.mul.Tensor %169, %175 : !torch.vtensor<[1,256,256,256],f32>, !torch.vtensor<[1,256,1,1],f32> -> !torch.vtensor<[1,256,256,256],f32>
    %177 = torch.aten.add.Tensor %176, %172, %int1 : !torch.vtensor<[1,256,256,256],f32>, !torch.vtensor<[1,256,1,1],f32>, !torch.int -> !torch.vtensor<[1,256,256,256],f32>
    %178 = torch.aten.sigmoid %177 : !torch.vtensor<[1,256,256,256],f32> -> !torch.vtensor<[1,256,256,256],f32>
    %179 = torch.aten.mul.Tensor %178, %177 : !torch.vtensor<[1,256,256,256],f32>, !torch.vtensor<[1,256,256,256],f32> -> !torch.vtensor<[1,256,256,256],f32>
    %180 = torch.aten.convolution %179, %arg26, %arg27, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,256,256,256],f32>, !torch.vtensor<[256,256,3,3],f32>, !torch.vtensor<[256],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,256,256,256],f32>
    %181 = torch.aten.convolution %122, %arg28, %arg29, %1, %2, %1, %false, %2, %int1 : !torch.vtensor<[1,128,256,256],f32>, !torch.vtensor<[256,128,1,1],f32>, !torch.vtensor<[256],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,256,256,256],f32>
    %182 = torch.aten.add.Tensor %181, %180, %int1 : !torch.vtensor<[1,256,256,256],f32>, !torch.vtensor<[1,256,256,256],f32>, !torch.int -> !torch.vtensor<[1,256,256,256],f32>
    %183 = torch.aten.div.Scalar %182, %float1.000000e00 : !torch.vtensor<[1,256,256,256],f32>, !torch.float -> !torch.vtensor<[1,256,256,256],f32>
    %184 = torch.aten.view %183, %152 : !torch.vtensor<[1,256,256,256],f32>, !torch.list<int> -> !torch.vtensor<[1,32,8,65536],f32>
    %185 = torch.aten.to.dtype %184, %int7, %false, %false, %none : !torch.vtensor<[1,32,8,65536],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,8,65536],f64>
    %186 = torch.aten.sum.dim_IntList %185, %6, %true, %none : !torch.vtensor<[1,32,8,65536],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %187 = torch.aten.div.Scalar %186, %int524288 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %188 = torch.aten.sub.Tensor %185, %187, %float1.000000e00 : !torch.vtensor<[1,32,8,65536],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,8,65536],f64>
    %189 = torch.aten.mul.Tensor %188, %188 : !torch.vtensor<[1,32,8,65536],f64>, !torch.vtensor<[1,32,8,65536],f64> -> !torch.vtensor<[1,32,8,65536],f64>
    %190 = torch.aten.sum.dim_IntList %189, %6, %true, %none : !torch.vtensor<[1,32,8,65536],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %191 = torch.aten.div.Scalar %190, %int524288 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %192 = torch.aten.to.dtype %191, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %193 = torch.aten.sum.dim_IntList %184, %6, %true, %none : !torch.vtensor<[1,32,8,65536],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %194 = torch.aten.div.Scalar %193, %int524288 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %195 = torch.aten.add.Scalar %192, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %196 = torch.aten.rsqrt %195 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %197 = torch.aten.sub.Tensor %184, %194, %int1 : !torch.vtensor<[1,32,8,65536],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,8,65536],f32>
    %198 = torch.aten.mul.Tensor %197, %196 : !torch.vtensor<[1,32,8,65536],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,8,65536],f32>
    %199 = torch.aten.view %198, %168 : !torch.vtensor<[1,32,8,65536],f32>, !torch.list<int> -> !torch.vtensor<[1,256,256,256],f32>
    %200 = torch.aten.unsqueeze %arg31, %int0 : !torch.vtensor<[256],f32>, !torch.int -> !torch.vtensor<[1,256],f32>
    %201 = torch.aten.unsqueeze %200, %int2 : !torch.vtensor<[1,256],f32>, !torch.int -> !torch.vtensor<[1,256,1],f32>
    %202 = torch.aten.unsqueeze %201, %int3 : !torch.vtensor<[1,256,1],f32>, !torch.int -> !torch.vtensor<[1,256,1,1],f32>
    %203 = torch.aten.unsqueeze %arg30, %int0 : !torch.vtensor<[256],f32>, !torch.int -> !torch.vtensor<[1,256],f32>
    %204 = torch.aten.unsqueeze %203, %int2 : !torch.vtensor<[1,256],f32>, !torch.int -> !torch.vtensor<[1,256,1],f32>
    %205 = torch.aten.unsqueeze %204, %int3 : !torch.vtensor<[1,256,1],f32>, !torch.int -> !torch.vtensor<[1,256,1,1],f32>
    %206 = torch.aten.mul.Tensor %199, %205 : !torch.vtensor<[1,256,256,256],f32>, !torch.vtensor<[1,256,1,1],f32> -> !torch.vtensor<[1,256,256,256],f32>
    %207 = torch.aten.add.Tensor %206, %202, %int1 : !torch.vtensor<[1,256,256,256],f32>, !torch.vtensor<[1,256,1,1],f32>, !torch.int -> !torch.vtensor<[1,256,256,256],f32>
    %208 = torch.aten.sigmoid %207 : !torch.vtensor<[1,256,256,256],f32> -> !torch.vtensor<[1,256,256,256],f32>
    %209 = torch.aten.mul.Tensor %208, %207 : !torch.vtensor<[1,256,256,256],f32>, !torch.vtensor<[1,256,256,256],f32> -> !torch.vtensor<[1,256,256,256],f32>
    %210 = torch.aten.convolution %209, %arg32, %arg33, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,256,256,256],f32>, !torch.vtensor<[256,256,3,3],f32>, !torch.vtensor<[256],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,256,256,256],f32>
    %211 = torch.aten.view %210, %152 : !torch.vtensor<[1,256,256,256],f32>, !torch.list<int> -> !torch.vtensor<[1,32,8,65536],f32>
    %212 = torch.aten.to.dtype %211, %int7, %false, %false, %none : !torch.vtensor<[1,32,8,65536],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,8,65536],f64>
    %213 = torch.aten.sum.dim_IntList %212, %6, %true, %none : !torch.vtensor<[1,32,8,65536],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %214 = torch.aten.div.Scalar %213, %int524288 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %215 = torch.aten.sub.Tensor %212, %214, %float1.000000e00 : !torch.vtensor<[1,32,8,65536],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,8,65536],f64>
    %216 = torch.aten.mul.Tensor %215, %215 : !torch.vtensor<[1,32,8,65536],f64>, !torch.vtensor<[1,32,8,65536],f64> -> !torch.vtensor<[1,32,8,65536],f64>
    %217 = torch.aten.sum.dim_IntList %216, %6, %true, %none : !torch.vtensor<[1,32,8,65536],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %218 = torch.aten.div.Scalar %217, %int524288 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %219 = torch.aten.to.dtype %218, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %220 = torch.aten.sum.dim_IntList %211, %6, %true, %none : !torch.vtensor<[1,32,8,65536],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %221 = torch.aten.div.Scalar %220, %int524288 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %222 = torch.aten.add.Scalar %219, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %223 = torch.aten.rsqrt %222 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %224 = torch.aten.sub.Tensor %211, %221, %int1 : !torch.vtensor<[1,32,8,65536],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,8,65536],f32>
    %225 = torch.aten.mul.Tensor %224, %223 : !torch.vtensor<[1,32,8,65536],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,8,65536],f32>
    %226 = torch.aten.view %225, %168 : !torch.vtensor<[1,32,8,65536],f32>, !torch.list<int> -> !torch.vtensor<[1,256,256,256],f32>
    %227 = torch.aten.unsqueeze %arg35, %int0 : !torch.vtensor<[256],f32>, !torch.int -> !torch.vtensor<[1,256],f32>
    %228 = torch.aten.unsqueeze %227, %int2 : !torch.vtensor<[1,256],f32>, !torch.int -> !torch.vtensor<[1,256,1],f32>
    %229 = torch.aten.unsqueeze %228, %int3 : !torch.vtensor<[1,256,1],f32>, !torch.int -> !torch.vtensor<[1,256,1,1],f32>
    %230 = torch.aten.unsqueeze %arg34, %int0 : !torch.vtensor<[256],f32>, !torch.int -> !torch.vtensor<[1,256],f32>
    %231 = torch.aten.unsqueeze %230, %int2 : !torch.vtensor<[1,256],f32>, !torch.int -> !torch.vtensor<[1,256,1],f32>
    %232 = torch.aten.unsqueeze %231, %int3 : !torch.vtensor<[1,256,1],f32>, !torch.int -> !torch.vtensor<[1,256,1,1],f32>
    %233 = torch.aten.mul.Tensor %226, %232 : !torch.vtensor<[1,256,256,256],f32>, !torch.vtensor<[1,256,1,1],f32> -> !torch.vtensor<[1,256,256,256],f32>
    %234 = torch.aten.add.Tensor %233, %229, %int1 : !torch.vtensor<[1,256,256,256],f32>, !torch.vtensor<[1,256,1,1],f32>, !torch.int -> !torch.vtensor<[1,256,256,256],f32>
    %235 = torch.aten.sigmoid %234 : !torch.vtensor<[1,256,256,256],f32> -> !torch.vtensor<[1,256,256,256],f32>
    %236 = torch.aten.mul.Tensor %235, %234 : !torch.vtensor<[1,256,256,256],f32>, !torch.vtensor<[1,256,256,256],f32> -> !torch.vtensor<[1,256,256,256],f32>
    %237 = torch.aten.convolution %236, %arg36, %arg37, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,256,256,256],f32>, !torch.vtensor<[256,256,3,3],f32>, !torch.vtensor<[256],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,256,256,256],f32>
    %238 = torch.aten.add.Tensor %183, %237, %int1 : !torch.vtensor<[1,256,256,256],f32>, !torch.vtensor<[1,256,256,256],f32>, !torch.int -> !torch.vtensor<[1,256,256,256],f32>
    %239 = torch.aten.div.Scalar %238, %float1.000000e00 : !torch.vtensor<[1,256,256,256],f32>, !torch.float -> !torch.vtensor<[1,256,256,256],f32>
    %240 = torch.aten.constant_pad_nd %239, %119, %float0.000000e00 : !torch.vtensor<[1,256,256,256],f32>, !torch.list<int>, !torch.float -> !torch.vtensor<[1,256,257,257],f32>
    %241 = torch.aten.convolution %240, %arg38, %arg39, %121, %2, %1, %false, %2, %int1 : !torch.vtensor<[1,256,257,257],f32>, !torch.vtensor<[256,256,3,3],f32>, !torch.vtensor<[256],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,256,128,128],f32>
    %242 = torch.prim.ListConstruct %int1, %int32, %int8, %int16384 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %243 = torch.aten.view %241, %242 : !torch.vtensor<[1,256,128,128],f32>, !torch.list<int> -> !torch.vtensor<[1,32,8,16384],f32>
    %244 = torch.aten.to.dtype %243, %int7, %false, %false, %none : !torch.vtensor<[1,32,8,16384],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,8,16384],f64>
    %245 = torch.aten.sum.dim_IntList %244, %6, %true, %none : !torch.vtensor<[1,32,8,16384],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %246 = torch.aten.div.Scalar %245, %int131072 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %247 = torch.aten.sub.Tensor %244, %246, %float1.000000e00 : !torch.vtensor<[1,32,8,16384],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,8,16384],f64>
    %248 = torch.aten.mul.Tensor %247, %247 : !torch.vtensor<[1,32,8,16384],f64>, !torch.vtensor<[1,32,8,16384],f64> -> !torch.vtensor<[1,32,8,16384],f64>
    %249 = torch.aten.sum.dim_IntList %248, %6, %true, %none : !torch.vtensor<[1,32,8,16384],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %250 = torch.aten.div.Scalar %249, %int131072 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %251 = torch.aten.to.dtype %250, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %252 = torch.aten.sum.dim_IntList %243, %6, %true, %none : !torch.vtensor<[1,32,8,16384],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %253 = torch.aten.div.Scalar %252, %int131072 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %254 = torch.aten.add.Scalar %251, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %255 = torch.aten.rsqrt %254 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %256 = torch.aten.sub.Tensor %243, %253, %int1 : !torch.vtensor<[1,32,8,16384],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,8,16384],f32>
    %257 = torch.aten.mul.Tensor %256, %255 : !torch.vtensor<[1,32,8,16384],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,8,16384],f32>
    %258 = torch.prim.ListConstruct %int1, %int256, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %259 = torch.aten.view %257, %258 : !torch.vtensor<[1,32,8,16384],f32>, !torch.list<int> -> !torch.vtensor<[1,256,128,128],f32>
    %260 = torch.aten.unsqueeze %arg41, %int0 : !torch.vtensor<[256],f32>, !torch.int -> !torch.vtensor<[1,256],f32>
    %261 = torch.aten.unsqueeze %260, %int2 : !torch.vtensor<[1,256],f32>, !torch.int -> !torch.vtensor<[1,256,1],f32>
    %262 = torch.aten.unsqueeze %261, %int3 : !torch.vtensor<[1,256,1],f32>, !torch.int -> !torch.vtensor<[1,256,1,1],f32>
    %263 = torch.aten.unsqueeze %arg40, %int0 : !torch.vtensor<[256],f32>, !torch.int -> !torch.vtensor<[1,256],f32>
    %264 = torch.aten.unsqueeze %263, %int2 : !torch.vtensor<[1,256],f32>, !torch.int -> !torch.vtensor<[1,256,1],f32>
    %265 = torch.aten.unsqueeze %264, %int3 : !torch.vtensor<[1,256,1],f32>, !torch.int -> !torch.vtensor<[1,256,1,1],f32>
    %266 = torch.aten.mul.Tensor %259, %265 : !torch.vtensor<[1,256,128,128],f32>, !torch.vtensor<[1,256,1,1],f32> -> !torch.vtensor<[1,256,128,128],f32>
    %267 = torch.aten.add.Tensor %266, %262, %int1 : !torch.vtensor<[1,256,128,128],f32>, !torch.vtensor<[1,256,1,1],f32>, !torch.int -> !torch.vtensor<[1,256,128,128],f32>
    %268 = torch.aten.sigmoid %267 : !torch.vtensor<[1,256,128,128],f32> -> !torch.vtensor<[1,256,128,128],f32>
    %269 = torch.aten.mul.Tensor %268, %267 : !torch.vtensor<[1,256,128,128],f32>, !torch.vtensor<[1,256,128,128],f32> -> !torch.vtensor<[1,256,128,128],f32>
    %270 = torch.aten.convolution %269, %arg42, %arg43, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,256,128,128],f32>, !torch.vtensor<[512,256,3,3],f32>, !torch.vtensor<[512],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,512,128,128],f32>
    %271 = torch.prim.ListConstruct %int1, %int32, %int16, %int16384 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %272 = torch.aten.view %270, %271 : !torch.vtensor<[1,512,128,128],f32>, !torch.list<int> -> !torch.vtensor<[1,32,16,16384],f32>
    %273 = torch.aten.to.dtype %272, %int7, %false, %false, %none : !torch.vtensor<[1,32,16,16384],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,16,16384],f64>
    %274 = torch.aten.sum.dim_IntList %273, %6, %true, %none : !torch.vtensor<[1,32,16,16384],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %275 = torch.aten.div.Scalar %274, %int262144 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %276 = torch.aten.sub.Tensor %273, %275, %float1.000000e00 : !torch.vtensor<[1,32,16,16384],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,16,16384],f64>
    %277 = torch.aten.mul.Tensor %276, %276 : !torch.vtensor<[1,32,16,16384],f64>, !torch.vtensor<[1,32,16,16384],f64> -> !torch.vtensor<[1,32,16,16384],f64>
    %278 = torch.aten.sum.dim_IntList %277, %6, %true, %none : !torch.vtensor<[1,32,16,16384],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %279 = torch.aten.div.Scalar %278, %int262144 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %280 = torch.aten.to.dtype %279, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %281 = torch.aten.sum.dim_IntList %272, %6, %true, %none : !torch.vtensor<[1,32,16,16384],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %282 = torch.aten.div.Scalar %281, %int262144 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %283 = torch.aten.add.Scalar %280, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %284 = torch.aten.rsqrt %283 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %285 = torch.aten.sub.Tensor %272, %282, %int1 : !torch.vtensor<[1,32,16,16384],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,16,16384],f32>
    %286 = torch.aten.mul.Tensor %285, %284 : !torch.vtensor<[1,32,16,16384],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,16,16384],f32>
    %287 = torch.prim.ListConstruct %int1, %int512, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %288 = torch.aten.view %286, %287 : !torch.vtensor<[1,32,16,16384],f32>, !torch.list<int> -> !torch.vtensor<[1,512,128,128],f32>
    %289 = torch.aten.unsqueeze %arg45, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %290 = torch.aten.unsqueeze %289, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %291 = torch.aten.unsqueeze %290, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %292 = torch.aten.unsqueeze %arg44, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %293 = torch.aten.unsqueeze %292, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %294 = torch.aten.unsqueeze %293, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %295 = torch.aten.mul.Tensor %288, %294 : !torch.vtensor<[1,512,128,128],f32>, !torch.vtensor<[1,512,1,1],f32> -> !torch.vtensor<[1,512,128,128],f32>
    %296 = torch.aten.add.Tensor %295, %291, %int1 : !torch.vtensor<[1,512,128,128],f32>, !torch.vtensor<[1,512,1,1],f32>, !torch.int -> !torch.vtensor<[1,512,128,128],f32>
    %297 = torch.aten.sigmoid %296 : !torch.vtensor<[1,512,128,128],f32> -> !torch.vtensor<[1,512,128,128],f32>
    %298 = torch.aten.mul.Tensor %297, %296 : !torch.vtensor<[1,512,128,128],f32>, !torch.vtensor<[1,512,128,128],f32> -> !torch.vtensor<[1,512,128,128],f32>
    %299 = torch.aten.convolution %298, %arg46, %arg47, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,512,128,128],f32>, !torch.vtensor<[512,512,3,3],f32>, !torch.vtensor<[512],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,512,128,128],f32>
    %300 = torch.aten.convolution %241, %arg48, %arg49, %1, %2, %1, %false, %2, %int1 : !torch.vtensor<[1,256,128,128],f32>, !torch.vtensor<[512,256,1,1],f32>, !torch.vtensor<[512],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,512,128,128],f32>
    %301 = torch.aten.add.Tensor %300, %299, %int1 : !torch.vtensor<[1,512,128,128],f32>, !torch.vtensor<[1,512,128,128],f32>, !torch.int -> !torch.vtensor<[1,512,128,128],f32>
    %302 = torch.aten.div.Scalar %301, %float1.000000e00 : !torch.vtensor<[1,512,128,128],f32>, !torch.float -> !torch.vtensor<[1,512,128,128],f32>
    %303 = torch.aten.view %302, %271 : !torch.vtensor<[1,512,128,128],f32>, !torch.list<int> -> !torch.vtensor<[1,32,16,16384],f32>
    %304 = torch.aten.to.dtype %303, %int7, %false, %false, %none : !torch.vtensor<[1,32,16,16384],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,16,16384],f64>
    %305 = torch.aten.sum.dim_IntList %304, %6, %true, %none : !torch.vtensor<[1,32,16,16384],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %306 = torch.aten.div.Scalar %305, %int262144 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %307 = torch.aten.sub.Tensor %304, %306, %float1.000000e00 : !torch.vtensor<[1,32,16,16384],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,16,16384],f64>
    %308 = torch.aten.mul.Tensor %307, %307 : !torch.vtensor<[1,32,16,16384],f64>, !torch.vtensor<[1,32,16,16384],f64> -> !torch.vtensor<[1,32,16,16384],f64>
    %309 = torch.aten.sum.dim_IntList %308, %6, %true, %none : !torch.vtensor<[1,32,16,16384],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %310 = torch.aten.div.Scalar %309, %int262144 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %311 = torch.aten.to.dtype %310, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %312 = torch.aten.sum.dim_IntList %303, %6, %true, %none : !torch.vtensor<[1,32,16,16384],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %313 = torch.aten.div.Scalar %312, %int262144 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %314 = torch.aten.add.Scalar %311, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %315 = torch.aten.rsqrt %314 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %316 = torch.aten.sub.Tensor %303, %313, %int1 : !torch.vtensor<[1,32,16,16384],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,16,16384],f32>
    %317 = torch.aten.mul.Tensor %316, %315 : !torch.vtensor<[1,32,16,16384],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,16,16384],f32>
    %318 = torch.aten.view %317, %287 : !torch.vtensor<[1,32,16,16384],f32>, !torch.list<int> -> !torch.vtensor<[1,512,128,128],f32>
    %319 = torch.aten.unsqueeze %arg51, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %320 = torch.aten.unsqueeze %319, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %321 = torch.aten.unsqueeze %320, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %322 = torch.aten.unsqueeze %arg50, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %323 = torch.aten.unsqueeze %322, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %324 = torch.aten.unsqueeze %323, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %325 = torch.aten.mul.Tensor %318, %324 : !torch.vtensor<[1,512,128,128],f32>, !torch.vtensor<[1,512,1,1],f32> -> !torch.vtensor<[1,512,128,128],f32>
    %326 = torch.aten.add.Tensor %325, %321, %int1 : !torch.vtensor<[1,512,128,128],f32>, !torch.vtensor<[1,512,1,1],f32>, !torch.int -> !torch.vtensor<[1,512,128,128],f32>
    %327 = torch.aten.sigmoid %326 : !torch.vtensor<[1,512,128,128],f32> -> !torch.vtensor<[1,512,128,128],f32>
    %328 = torch.aten.mul.Tensor %327, %326 : !torch.vtensor<[1,512,128,128],f32>, !torch.vtensor<[1,512,128,128],f32> -> !torch.vtensor<[1,512,128,128],f32>
    %329 = torch.aten.convolution %328, %arg52, %arg53, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,512,128,128],f32>, !torch.vtensor<[512,512,3,3],f32>, !torch.vtensor<[512],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,512,128,128],f32>
    %330 = torch.aten.view %329, %271 : !torch.vtensor<[1,512,128,128],f32>, !torch.list<int> -> !torch.vtensor<[1,32,16,16384],f32>
    %331 = torch.aten.to.dtype %330, %int7, %false, %false, %none : !torch.vtensor<[1,32,16,16384],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,16,16384],f64>
    %332 = torch.aten.sum.dim_IntList %331, %6, %true, %none : !torch.vtensor<[1,32,16,16384],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %333 = torch.aten.div.Scalar %332, %int262144 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %334 = torch.aten.sub.Tensor %331, %333, %float1.000000e00 : !torch.vtensor<[1,32,16,16384],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,16,16384],f64>
    %335 = torch.aten.mul.Tensor %334, %334 : !torch.vtensor<[1,32,16,16384],f64>, !torch.vtensor<[1,32,16,16384],f64> -> !torch.vtensor<[1,32,16,16384],f64>
    %336 = torch.aten.sum.dim_IntList %335, %6, %true, %none : !torch.vtensor<[1,32,16,16384],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %337 = torch.aten.div.Scalar %336, %int262144 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %338 = torch.aten.to.dtype %337, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %339 = torch.aten.sum.dim_IntList %330, %6, %true, %none : !torch.vtensor<[1,32,16,16384],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %340 = torch.aten.div.Scalar %339, %int262144 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %341 = torch.aten.add.Scalar %338, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %342 = torch.aten.rsqrt %341 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %343 = torch.aten.sub.Tensor %330, %340, %int1 : !torch.vtensor<[1,32,16,16384],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,16,16384],f32>
    %344 = torch.aten.mul.Tensor %343, %342 : !torch.vtensor<[1,32,16,16384],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,16,16384],f32>
    %345 = torch.aten.view %344, %287 : !torch.vtensor<[1,32,16,16384],f32>, !torch.list<int> -> !torch.vtensor<[1,512,128,128],f32>
    %346 = torch.aten.unsqueeze %arg55, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %347 = torch.aten.unsqueeze %346, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %348 = torch.aten.unsqueeze %347, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %349 = torch.aten.unsqueeze %arg54, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %350 = torch.aten.unsqueeze %349, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %351 = torch.aten.unsqueeze %350, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %352 = torch.aten.mul.Tensor %345, %351 : !torch.vtensor<[1,512,128,128],f32>, !torch.vtensor<[1,512,1,1],f32> -> !torch.vtensor<[1,512,128,128],f32>
    %353 = torch.aten.add.Tensor %352, %348, %int1 : !torch.vtensor<[1,512,128,128],f32>, !torch.vtensor<[1,512,1,1],f32>, !torch.int -> !torch.vtensor<[1,512,128,128],f32>
    %354 = torch.aten.sigmoid %353 : !torch.vtensor<[1,512,128,128],f32> -> !torch.vtensor<[1,512,128,128],f32>
    %355 = torch.aten.mul.Tensor %354, %353 : !torch.vtensor<[1,512,128,128],f32>, !torch.vtensor<[1,512,128,128],f32> -> !torch.vtensor<[1,512,128,128],f32>
    %356 = torch.aten.convolution %355, %arg56, %arg57, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,512,128,128],f32>, !torch.vtensor<[512,512,3,3],f32>, !torch.vtensor<[512],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,512,128,128],f32>
    %357 = torch.aten.add.Tensor %302, %356, %int1 : !torch.vtensor<[1,512,128,128],f32>, !torch.vtensor<[1,512,128,128],f32>, !torch.int -> !torch.vtensor<[1,512,128,128],f32>
    %358 = torch.aten.div.Scalar %357, %float1.000000e00 : !torch.vtensor<[1,512,128,128],f32>, !torch.float -> !torch.vtensor<[1,512,128,128],f32>
    %359 = torch.aten.constant_pad_nd %358, %119, %float0.000000e00 : !torch.vtensor<[1,512,128,128],f32>, !torch.list<int>, !torch.float -> !torch.vtensor<[1,512,129,129],f32>
    %360 = torch.aten.convolution %359, %arg58, %arg59, %121, %2, %1, %false, %2, %int1 : !torch.vtensor<[1,512,129,129],f32>, !torch.vtensor<[512,512,3,3],f32>, !torch.vtensor<[512],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %361 = torch.prim.ListConstruct %int1, %int32, %int16, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %362 = torch.aten.view %360, %361 : !torch.vtensor<[1,512,64,64],f32>, !torch.list<int> -> !torch.vtensor<[1,32,16,4096],f32>
    %363 = torch.aten.to.dtype %362, %int7, %false, %false, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,16,4096],f64>
    %364 = torch.aten.sum.dim_IntList %363, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %365 = torch.aten.div.Scalar %364, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %366 = torch.aten.sub.Tensor %363, %365, %float1.000000e00 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,16,4096],f64>
    %367 = torch.aten.mul.Tensor %366, %366 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,16,4096],f64> -> !torch.vtensor<[1,32,16,4096],f64>
    %368 = torch.aten.sum.dim_IntList %367, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %369 = torch.aten.div.Scalar %368, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %370 = torch.aten.to.dtype %369, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %371 = torch.aten.sum.dim_IntList %362, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %372 = torch.aten.div.Scalar %371, %int65536 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %373 = torch.aten.add.Scalar %370, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %374 = torch.aten.rsqrt %373 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %375 = torch.aten.sub.Tensor %362, %372, %int1 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,16,4096],f32>
    %376 = torch.aten.mul.Tensor %375, %374 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,16,4096],f32>
    %377 = torch.prim.ListConstruct %int1, %int512, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %378 = torch.aten.view %376, %377 : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int> -> !torch.vtensor<[1,512,64,64],f32>
    %379 = torch.aten.unsqueeze %arg61, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %380 = torch.aten.unsqueeze %379, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %381 = torch.aten.unsqueeze %380, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %382 = torch.aten.unsqueeze %arg60, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %383 = torch.aten.unsqueeze %382, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %384 = torch.aten.unsqueeze %383, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %385 = torch.aten.mul.Tensor %378, %384 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %386 = torch.aten.add.Tensor %385, %381, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %387 = torch.aten.sigmoid %386 : !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %388 = torch.aten.mul.Tensor %387, %386 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %389 = torch.aten.convolution %388, %arg62, %arg63, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[512,512,3,3],f32>, !torch.vtensor<[512],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %390 = torch.aten.view %389, %361 : !torch.vtensor<[1,512,64,64],f32>, !torch.list<int> -> !torch.vtensor<[1,32,16,4096],f32>
    %391 = torch.aten.to.dtype %390, %int7, %false, %false, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,16,4096],f64>
    %392 = torch.aten.sum.dim_IntList %391, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %393 = torch.aten.div.Scalar %392, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %394 = torch.aten.sub.Tensor %391, %393, %float1.000000e00 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,16,4096],f64>
    %395 = torch.aten.mul.Tensor %394, %394 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,16,4096],f64> -> !torch.vtensor<[1,32,16,4096],f64>
    %396 = torch.aten.sum.dim_IntList %395, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %397 = torch.aten.div.Scalar %396, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %398 = torch.aten.to.dtype %397, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %399 = torch.aten.sum.dim_IntList %390, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %400 = torch.aten.div.Scalar %399, %int65536 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %401 = torch.aten.add.Scalar %398, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %402 = torch.aten.rsqrt %401 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %403 = torch.aten.sub.Tensor %390, %400, %int1 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,16,4096],f32>
    %404 = torch.aten.mul.Tensor %403, %402 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,16,4096],f32>
    %405 = torch.aten.view %404, %377 : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int> -> !torch.vtensor<[1,512,64,64],f32>
    %406 = torch.aten.unsqueeze %arg65, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %407 = torch.aten.unsqueeze %406, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %408 = torch.aten.unsqueeze %407, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %409 = torch.aten.unsqueeze %arg64, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %410 = torch.aten.unsqueeze %409, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %411 = torch.aten.unsqueeze %410, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %412 = torch.aten.mul.Tensor %405, %411 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %413 = torch.aten.add.Tensor %412, %408, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %414 = torch.aten.sigmoid %413 : !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %415 = torch.aten.mul.Tensor %414, %413 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %416 = torch.aten.convolution %415, %arg66, %arg67, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[512,512,3,3],f32>, !torch.vtensor<[512],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %417 = torch.aten.add.Tensor %360, %416, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,64,64],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %418 = torch.aten.div.Scalar %417, %float1.000000e00 : !torch.vtensor<[1,512,64,64],f32>, !torch.float -> !torch.vtensor<[1,512,64,64],f32>
    %419 = torch.aten.view %418, %361 : !torch.vtensor<[1,512,64,64],f32>, !torch.list<int> -> !torch.vtensor<[1,32,16,4096],f32>
    %420 = torch.aten.to.dtype %419, %int7, %false, %false, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,16,4096],f64>
    %421 = torch.aten.sum.dim_IntList %420, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %422 = torch.aten.div.Scalar %421, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %423 = torch.aten.sub.Tensor %420, %422, %float1.000000e00 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,16,4096],f64>
    %424 = torch.aten.mul.Tensor %423, %423 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,16,4096],f64> -> !torch.vtensor<[1,32,16,4096],f64>
    %425 = torch.aten.sum.dim_IntList %424, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %426 = torch.aten.div.Scalar %425, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %427 = torch.aten.to.dtype %426, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %428 = torch.aten.sum.dim_IntList %419, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %429 = torch.aten.div.Scalar %428, %int65536 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %430 = torch.aten.add.Scalar %427, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %431 = torch.aten.rsqrt %430 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %432 = torch.aten.sub.Tensor %419, %429, %int1 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,16,4096],f32>
    %433 = torch.aten.mul.Tensor %432, %431 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,16,4096],f32>
    %434 = torch.aten.view %433, %377 : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int> -> !torch.vtensor<[1,512,64,64],f32>
    %435 = torch.aten.unsqueeze %arg69, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %436 = torch.aten.unsqueeze %435, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %437 = torch.aten.unsqueeze %436, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %438 = torch.aten.unsqueeze %arg68, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %439 = torch.aten.unsqueeze %438, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %440 = torch.aten.unsqueeze %439, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %441 = torch.aten.mul.Tensor %434, %440 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %442 = torch.aten.add.Tensor %441, %437, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %443 = torch.aten.sigmoid %442 : !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %444 = torch.aten.mul.Tensor %443, %442 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %445 = torch.aten.convolution %444, %arg70, %arg71, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[512,512,3,3],f32>, !torch.vtensor<[512],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %446 = torch.aten.view %445, %361 : !torch.vtensor<[1,512,64,64],f32>, !torch.list<int> -> !torch.vtensor<[1,32,16,4096],f32>
    %447 = torch.aten.to.dtype %446, %int7, %false, %false, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,16,4096],f64>
    %448 = torch.aten.sum.dim_IntList %447, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %449 = torch.aten.div.Scalar %448, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %450 = torch.aten.sub.Tensor %447, %449, %float1.000000e00 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,16,4096],f64>
    %451 = torch.aten.mul.Tensor %450, %450 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,16,4096],f64> -> !torch.vtensor<[1,32,16,4096],f64>
    %452 = torch.aten.sum.dim_IntList %451, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %453 = torch.aten.div.Scalar %452, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %454 = torch.aten.to.dtype %453, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %455 = torch.aten.sum.dim_IntList %446, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %456 = torch.aten.div.Scalar %455, %int65536 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %457 = torch.aten.add.Scalar %454, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %458 = torch.aten.rsqrt %457 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %459 = torch.aten.sub.Tensor %446, %456, %int1 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,16,4096],f32>
    %460 = torch.aten.mul.Tensor %459, %458 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,16,4096],f32>
    %461 = torch.aten.view %460, %377 : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int> -> !torch.vtensor<[1,512,64,64],f32>
    %462 = torch.aten.unsqueeze %arg73, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %463 = torch.aten.unsqueeze %462, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %464 = torch.aten.unsqueeze %463, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %465 = torch.aten.unsqueeze %arg72, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %466 = torch.aten.unsqueeze %465, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %467 = torch.aten.unsqueeze %466, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %468 = torch.aten.mul.Tensor %461, %467 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %469 = torch.aten.add.Tensor %468, %464, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %470 = torch.aten.sigmoid %469 : !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %471 = torch.aten.mul.Tensor %470, %469 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %472 = torch.aten.convolution %471, %arg74, %arg75, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[512,512,3,3],f32>, !torch.vtensor<[512],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %473 = torch.aten.add.Tensor %418, %472, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,64,64],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %474 = torch.aten.div.Scalar %473, %float1.000000e00 : !torch.vtensor<[1,512,64,64],f32>, !torch.float -> !torch.vtensor<[1,512,64,64],f32>
    %475 = torch.aten.view %474, %361 : !torch.vtensor<[1,512,64,64],f32>, !torch.list<int> -> !torch.vtensor<[1,32,16,4096],f32>
    %476 = torch.aten.to.dtype %475, %int7, %false, %false, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,16,4096],f64>
    %477 = torch.aten.sum.dim_IntList %476, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %478 = torch.aten.div.Scalar %477, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %479 = torch.aten.sub.Tensor %476, %478, %float1.000000e00 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,16,4096],f64>
    %480 = torch.aten.mul.Tensor %479, %479 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,16,4096],f64> -> !torch.vtensor<[1,32,16,4096],f64>
    %481 = torch.aten.sum.dim_IntList %480, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %482 = torch.aten.div.Scalar %481, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %483 = torch.aten.to.dtype %482, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %484 = torch.aten.sum.dim_IntList %475, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %485 = torch.aten.div.Scalar %484, %int65536 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %486 = torch.aten.add.Scalar %483, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %487 = torch.aten.rsqrt %486 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %488 = torch.aten.sub.Tensor %475, %485, %int1 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,16,4096],f32>
    %489 = torch.aten.mul.Tensor %488, %487 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,16,4096],f32>
    %490 = torch.aten.view %489, %377 : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int> -> !torch.vtensor<[1,512,64,64],f32>
    %491 = torch.aten.unsqueeze %arg77, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %492 = torch.aten.unsqueeze %491, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %493 = torch.aten.unsqueeze %492, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %494 = torch.aten.unsqueeze %arg76, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %495 = torch.aten.unsqueeze %494, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %496 = torch.aten.unsqueeze %495, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %497 = torch.aten.mul.Tensor %490, %496 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %498 = torch.aten.add.Tensor %497, %493, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %499 = torch.aten.sigmoid %498 : !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %500 = torch.aten.mul.Tensor %499, %498 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %501 = torch.aten.convolution %500, %arg78, %arg79, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[512,512,3,3],f32>, !torch.vtensor<[512],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %502 = torch.aten.view %501, %361 : !torch.vtensor<[1,512,64,64],f32>, !torch.list<int> -> !torch.vtensor<[1,32,16,4096],f32>
    %503 = torch.aten.to.dtype %502, %int7, %false, %false, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,16,4096],f64>
    %504 = torch.aten.sum.dim_IntList %503, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %505 = torch.aten.div.Scalar %504, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %506 = torch.aten.sub.Tensor %503, %505, %float1.000000e00 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,16,4096],f64>
    %507 = torch.aten.mul.Tensor %506, %506 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,16,4096],f64> -> !torch.vtensor<[1,32,16,4096],f64>
    %508 = torch.aten.sum.dim_IntList %507, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %509 = torch.aten.div.Scalar %508, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %510 = torch.aten.to.dtype %509, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %511 = torch.aten.sum.dim_IntList %502, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %512 = torch.aten.div.Scalar %511, %int65536 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %513 = torch.aten.add.Scalar %510, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %514 = torch.aten.rsqrt %513 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %515 = torch.aten.sub.Tensor %502, %512, %int1 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,16,4096],f32>
    %516 = torch.aten.mul.Tensor %515, %514 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,16,4096],f32>
    %517 = torch.aten.view %516, %377 : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int> -> !torch.vtensor<[1,512,64,64],f32>
    %518 = torch.aten.unsqueeze %arg81, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %519 = torch.aten.unsqueeze %518, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %520 = torch.aten.unsqueeze %519, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %521 = torch.aten.unsqueeze %arg80, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %522 = torch.aten.unsqueeze %521, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %523 = torch.aten.unsqueeze %522, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %524 = torch.aten.mul.Tensor %517, %523 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %525 = torch.aten.add.Tensor %524, %520, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %526 = torch.aten.sigmoid %525 : !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %527 = torch.aten.mul.Tensor %526, %525 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %528 = torch.aten.convolution %527, %arg82, %arg83, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[512,512,3,3],f32>, !torch.vtensor<[512],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %529 = torch.aten.add.Tensor %474, %528, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,64,64],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %530 = torch.aten.div.Scalar %529, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %531 = torch.aten.view %530, %361 : !torch.vtensor<[1,512,64,64],f32>, !torch.list<int> -> !torch.vtensor<[1,32,16,4096],f32>
    %532 = torch.aten.to.dtype %531, %int7, %false, %false, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,16,4096],f64>
    %533 = torch.aten.sum.dim_IntList %532, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %534 = torch.aten.div.Scalar %533, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %535 = torch.aten.sub.Tensor %532, %534, %float1.000000e00 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,16,4096],f64>
    %536 = torch.aten.mul.Tensor %535, %535 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,16,4096],f64> -> !torch.vtensor<[1,32,16,4096],f64>
    %537 = torch.aten.sum.dim_IntList %536, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %538 = torch.aten.div.Scalar %537, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %539 = torch.aten.to.dtype %538, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %540 = torch.aten.sum.dim_IntList %531, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %541 = torch.aten.div.Scalar %540, %int65536 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %542 = torch.aten.add.Scalar %539, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %543 = torch.aten.rsqrt %542 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %544 = torch.aten.sub.Tensor %531, %541, %int1 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,16,4096],f32>
    %545 = torch.aten.mul.Tensor %544, %543 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,16,4096],f32>
    %546 = torch.aten.view %545, %377 : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int> -> !torch.vtensor<[1,512,64,64],f32>
    %547 = torch.aten.unsqueeze %arg85, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %548 = torch.aten.unsqueeze %547, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %549 = torch.aten.unsqueeze %548, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %550 = torch.aten.unsqueeze %arg84, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %551 = torch.aten.unsqueeze %550, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %552 = torch.aten.unsqueeze %551, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %553 = torch.aten.mul.Tensor %546, %552 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %554 = torch.aten.add.Tensor %553, %549, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %555 = torch.prim.ListConstruct %int1, %int512, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %556 = torch.aten.view %554, %555 : !torch.vtensor<[1,512,64,64],f32>, !torch.list<int> -> !torch.vtensor<[1,512,4096],f32>
    %557 = torch.aten.transpose.int %556, %int1, %int2 : !torch.vtensor<[1,512,4096],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,4096,512],f32>
    %558 = torch.aten.transpose.int %arg86, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %559 = torch.prim.ListConstruct %int1, %int4096, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %560 = torch.aten.broadcast_to %557, %559 : !torch.vtensor<[1,4096,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4096,512],f32>
    %561 = torch.aten.view %560, %559 : !torch.vtensor<[1,4096,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4096,512],f32>
    %562 = torch.prim.ListConstruct %int1, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %563 = torch.aten.broadcast_to %558, %562 : !torch.vtensor<[512,512],f32>, !torch.list<int> -> !torch.vtensor<[1,512,512],f32>
    %564 = torch.aten.view %563, %562 : !torch.vtensor<[1,512,512],f32>, !torch.list<int> -> !torch.vtensor<[1,512,512],f32>
    %565 = torch.aten.bmm %561, %564 : !torch.vtensor<[1,4096,512],f32>, !torch.vtensor<[1,512,512],f32> -> !torch.vtensor<[1,4096,512],f32>
    %566 = torch.aten.view %565, %559 : !torch.vtensor<[1,4096,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4096,512],f32>
    %567 = torch.aten.add.Tensor %566, %arg87, %int1 : !torch.vtensor<[1,4096,512],f32>, !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,4096,512],f32>
    %568 = torch.aten.transpose.int %arg88, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %569 = torch.aten.broadcast_to %557, %559 : !torch.vtensor<[1,4096,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4096,512],f32>
    %570 = torch.aten.view %569, %559 : !torch.vtensor<[1,4096,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4096,512],f32>
    %571 = torch.aten.broadcast_to %568, %562 : !torch.vtensor<[512,512],f32>, !torch.list<int> -> !torch.vtensor<[1,512,512],f32>
    %572 = torch.aten.view %571, %562 : !torch.vtensor<[1,512,512],f32>, !torch.list<int> -> !torch.vtensor<[1,512,512],f32>
    %573 = torch.aten.bmm %570, %572 : !torch.vtensor<[1,4096,512],f32>, !torch.vtensor<[1,512,512],f32> -> !torch.vtensor<[1,4096,512],f32>
    %574 = torch.aten.view %573, %559 : !torch.vtensor<[1,4096,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4096,512],f32>
    %575 = torch.aten.add.Tensor %574, %arg89, %int1 : !torch.vtensor<[1,4096,512],f32>, !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,4096,512],f32>
    %576 = torch.aten.transpose.int %arg90, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %577 = torch.aten.broadcast_to %557, %559 : !torch.vtensor<[1,4096,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4096,512],f32>
    %578 = torch.aten.view %577, %559 : !torch.vtensor<[1,4096,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4096,512],f32>
    %579 = torch.aten.broadcast_to %576, %562 : !torch.vtensor<[512,512],f32>, !torch.list<int> -> !torch.vtensor<[1,512,512],f32>
    %580 = torch.aten.view %579, %562 : !torch.vtensor<[1,512,512],f32>, !torch.list<int> -> !torch.vtensor<[1,512,512],f32>
    %581 = torch.aten.bmm %578, %580 : !torch.vtensor<[1,4096,512],f32>, !torch.vtensor<[1,512,512],f32> -> !torch.vtensor<[1,4096,512],f32>
    %582 = torch.aten.view %581, %559 : !torch.vtensor<[1,4096,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4096,512],f32>
    %583 = torch.aten.add.Tensor %582, %arg91, %int1 : !torch.vtensor<[1,4096,512],f32>, !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,4096,512],f32>
    %584 = torch.prim.ListConstruct %int1, %int4096, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %585 = torch.aten.empty.memory_format %584, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[1,4096,4096],f32>
    %586 = torch.aten.transpose.int %575, %int-1, %int-2 : !torch.vtensor<[1,4096,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,512,4096],f32>
    %587 = torch.aten.bmm %567, %586 : !torch.vtensor<[1,4096,512],f32>, !torch.vtensor<[1,512,4096],f32> -> !torch.vtensor<[1,4096,4096],f32>
    %588 = torch.aten.mul.Scalar %587, %float4.419420e-02 : !torch.vtensor<[1,4096,4096],f32>, !torch.float -> !torch.vtensor<[1,4096,4096],f32>
    %589 = torch.aten.add.Tensor %588, %585, %int0 : !torch.vtensor<[1,4096,4096],f32>, !torch.vtensor<[1,4096,4096],f32>, !torch.int -> !torch.vtensor<[1,4096,4096],f32>
    %values, %indices = torch.aten.max.dim %589, %int-1, %true : !torch.vtensor<[1,4096,4096],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,4096,1],f32>, !torch.vtensor<[1,4096,1],si64>
    %590 = torch.aten.sub.Tensor %589, %values, %float1.000000e00 : !torch.vtensor<[1,4096,4096],f32>, !torch.vtensor<[1,4096,1],f32>, !torch.float -> !torch.vtensor<[1,4096,4096],f32>
    %591 = torch.aten.exp %590 : !torch.vtensor<[1,4096,4096],f32> -> !torch.vtensor<[1,4096,4096],f32>
    %592 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %593 = torch.aten.sum.dim_IntList %591, %592, %true, %none : !torch.vtensor<[1,4096,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,4096,1],f32>
    %594 = torch.aten.div.Tensor %591, %593 : !torch.vtensor<[1,4096,4096],f32>, !torch.vtensor<[1,4096,1],f32> -> !torch.vtensor<[1,4096,4096],f32>
    %595 = torch.aten.bmm %594, %583 : !torch.vtensor<[1,4096,4096],f32>, !torch.vtensor<[1,4096,512],f32> -> !torch.vtensor<[1,4096,512],f32>
    %596 = torch.aten.transpose.int %arg92, %int0, %int1 : !torch.vtensor<[512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[512,512],f32>
    %597 = torch.prim.ListConstruct %int4096, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %598 = torch.aten.view %595, %597 : !torch.vtensor<[1,4096,512],f32>, !torch.list<int> -> !torch.vtensor<[4096,512],f32>
    %599 = torch.aten.mm %598, %596 : !torch.vtensor<[4096,512],f32>, !torch.vtensor<[512,512],f32> -> !torch.vtensor<[4096,512],f32>
    %600 = torch.aten.mul.Scalar %arg93, %int1 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[512],f32>
    %601 = torch.aten.add.Tensor %600, %599, %int1 : !torch.vtensor<[512],f32>, !torch.vtensor<[4096,512],f32>, !torch.int -> !torch.vtensor<[4096,512],f32>
    %602 = torch.aten.view %601, %559 : !torch.vtensor<[4096,512],f32>, !torch.list<int> -> !torch.vtensor<[1,4096,512],f32>
    %603 = torch.aten.transpose.int %602, %int-1, %int-2 : !torch.vtensor<[1,4096,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,512,4096],f32>
    %604 = torch.aten.view %603, %377 : !torch.vtensor<[1,512,4096],f32>, !torch.list<int> -> !torch.vtensor<[1,512,64,64],f32>
    %605 = torch.aten.add.Tensor %604, %530, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,64,64],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %606 = torch.aten.div.Scalar %605, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %607 = torch.aten.clone %606, %int2 : !torch.vtensor<[1,512,64,64],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %608 = torch.aten.view %607, %361 : !torch.vtensor<[1,512,64,64],f32>, !torch.list<int> -> !torch.vtensor<[1,32,16,4096],f32>
    %609 = torch.aten.to.dtype %608, %int7, %false, %false, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,16,4096],f64>
    %610 = torch.aten.sum.dim_IntList %609, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %611 = torch.aten.div.Scalar %610, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %612 = torch.aten.sub.Tensor %609, %611, %float1.000000e00 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,16,4096],f64>
    %613 = torch.aten.mul.Tensor %612, %612 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,16,4096],f64> -> !torch.vtensor<[1,32,16,4096],f64>
    %614 = torch.aten.sum.dim_IntList %613, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %615 = torch.aten.div.Scalar %614, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %616 = torch.aten.to.dtype %615, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %617 = torch.aten.sum.dim_IntList %608, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %618 = torch.aten.div.Scalar %617, %int65536 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %619 = torch.aten.add.Scalar %616, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %620 = torch.aten.rsqrt %619 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %621 = torch.aten.sub.Tensor %608, %618, %int1 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,16,4096],f32>
    %622 = torch.aten.mul.Tensor %621, %620 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,16,4096],f32>
    %623 = torch.aten.view %622, %377 : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int> -> !torch.vtensor<[1,512,64,64],f32>
    %624 = torch.aten.unsqueeze %arg95, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %625 = torch.aten.unsqueeze %624, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %626 = torch.aten.unsqueeze %625, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %627 = torch.aten.unsqueeze %arg94, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %628 = torch.aten.unsqueeze %627, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %629 = torch.aten.unsqueeze %628, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %630 = torch.aten.mul.Tensor %623, %629 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %631 = torch.aten.add.Tensor %630, %626, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %632 = torch.aten.sigmoid %631 : !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %633 = torch.aten.mul.Tensor %632, %631 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %634 = torch.aten.convolution %633, %arg96, %arg97, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[512,512,3,3],f32>, !torch.vtensor<[512],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %635 = torch.aten.clone %634, %int2 : !torch.vtensor<[1,512,64,64],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %636 = torch.aten.view %635, %361 : !torch.vtensor<[1,512,64,64],f32>, !torch.list<int> -> !torch.vtensor<[1,32,16,4096],f32>
    %637 = torch.aten.to.dtype %636, %int7, %false, %false, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,16,4096],f64>
    %638 = torch.aten.sum.dim_IntList %637, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %639 = torch.aten.div.Scalar %638, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %640 = torch.aten.sub.Tensor %637, %639, %float1.000000e00 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,16,4096],f64>
    %641 = torch.aten.mul.Tensor %640, %640 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,16,4096],f64> -> !torch.vtensor<[1,32,16,4096],f64>
    %642 = torch.aten.sum.dim_IntList %641, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %643 = torch.aten.div.Scalar %642, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %644 = torch.aten.to.dtype %643, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %645 = torch.aten.sum.dim_IntList %636, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %646 = torch.aten.div.Scalar %645, %int65536 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %647 = torch.aten.add.Scalar %644, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %648 = torch.aten.rsqrt %647 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %649 = torch.aten.sub.Tensor %636, %646, %int1 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,16,4096],f32>
    %650 = torch.aten.mul.Tensor %649, %648 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,16,4096],f32>
    %651 = torch.aten.view %650, %377 : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int> -> !torch.vtensor<[1,512,64,64],f32>
    %652 = torch.aten.unsqueeze %arg99, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %653 = torch.aten.unsqueeze %652, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %654 = torch.aten.unsqueeze %653, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %655 = torch.aten.unsqueeze %arg98, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %656 = torch.aten.unsqueeze %655, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %657 = torch.aten.unsqueeze %656, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %658 = torch.aten.mul.Tensor %651, %657 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %659 = torch.aten.add.Tensor %658, %654, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %660 = torch.aten.sigmoid %659 : !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %661 = torch.aten.mul.Tensor %660, %659 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %662 = torch.aten.convolution %661, %arg100, %arg101, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[512,512,3,3],f32>, !torch.vtensor<[512],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %663 = torch.aten.add.Tensor %606, %662, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,64,64],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %664 = torch.aten.div.Scalar %663, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %665 = torch.aten.clone %664, %int2 : !torch.vtensor<[1,512,64,64],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %666 = torch.aten.view %665, %361 : !torch.vtensor<[1,512,64,64],f32>, !torch.list<int> -> !torch.vtensor<[1,32,16,4096],f32>
    %667 = torch.aten.to.dtype %666, %int7, %false, %false, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,16,4096],f64>
    %668 = torch.aten.sum.dim_IntList %667, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %669 = torch.aten.div.Scalar %668, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %670 = torch.aten.sub.Tensor %667, %669, %float1.000000e00 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,1,1],f64>, !torch.float -> !torch.vtensor<[1,32,16,4096],f64>
    %671 = torch.aten.mul.Tensor %670, %670 : !torch.vtensor<[1,32,16,4096],f64>, !torch.vtensor<[1,32,16,4096],f64> -> !torch.vtensor<[1,32,16,4096],f64>
    %672 = torch.aten.sum.dim_IntList %671, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f64>
    %673 = torch.aten.div.Scalar %672, %int65536 : !torch.vtensor<[1,32,1,1],f64>, !torch.int -> !torch.vtensor<[1,32,1,1],f64>
    %674 = torch.aten.to.dtype %673, %int6, %false, %false, %none : !torch.vtensor<[1,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %675 = torch.aten.sum.dim_IntList %666, %6, %true, %none : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,32,1,1],f32>
    %676 = torch.aten.div.Scalar %675, %int65536 : !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %677 = torch.aten.add.Scalar %674, %float9.999990e-07, %int1 : !torch.vtensor<[1,32,1,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,32,1,1],f32>
    %678 = torch.aten.rsqrt %677 : !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,1,1],f32>
    %679 = torch.aten.sub.Tensor %666, %676, %int1 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32>, !torch.int -> !torch.vtensor<[1,32,16,4096],f32>
    %680 = torch.aten.mul.Tensor %679, %678 : !torch.vtensor<[1,32,16,4096],f32>, !torch.vtensor<[1,32,1,1],f32> -> !torch.vtensor<[1,32,16,4096],f32>
    %681 = torch.aten.view %680, %377 : !torch.vtensor<[1,32,16,4096],f32>, !torch.list<int> -> !torch.vtensor<[1,512,64,64],f32>
    %682 = torch.aten.unsqueeze %arg103, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %683 = torch.aten.unsqueeze %682, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %684 = torch.aten.unsqueeze %683, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %685 = torch.aten.unsqueeze %arg102, %int0 : !torch.vtensor<[512],f32>, !torch.int -> !torch.vtensor<[1,512],f32>
    %686 = torch.aten.unsqueeze %685, %int2 : !torch.vtensor<[1,512],f32>, !torch.int -> !torch.vtensor<[1,512,1],f32>
    %687 = torch.aten.unsqueeze %686, %int3 : !torch.vtensor<[1,512,1],f32>, !torch.int -> !torch.vtensor<[1,512,1,1],f32>
    %688 = torch.aten.mul.Tensor %681, %687 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %689 = torch.aten.add.Tensor %688, %684, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,1,1],f32>, !torch.int -> !torch.vtensor<[1,512,64,64],f32>
    %690 = torch.aten.sigmoid %689 : !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %691 = torch.aten.mul.Tensor %690, %689 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[1,512,64,64],f32> -> !torch.vtensor<[1,512,64,64],f32>
    %692 = torch.aten.convolution %691, %arg104, %arg105, %1, %1, %1, %false, %2, %int1 : !torch.vtensor<[1,512,64,64],f32>, !torch.vtensor<[8,512,3,3],f32>, !torch.vtensor<[8],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,8,64,64],f32>
    %693 = torch.aten.convolution %692, %arg106, %arg107, %1, %2, %1, %false, %2, %int1 : !torch.vtensor<[1,8,64,64],f32>, !torch.vtensor<[8,8,1,1],f32>, !torch.vtensor<[8],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,8,64,64],f32>
    %694 = torch.aten.slice.Tensor %693, %int1, %int0, %int4, %int1 : !torch.vtensor<[1,8,64,64],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,4,64,64],f32>
    %695 = torch.aten.slice.Tensor %693, %int1, %int4, %int8, %int1 : !torch.vtensor<[1,8,64,64],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,4,64,64],f32>
    %696 = torch.aten.clamp %695, %float-3.000000e01, %float2.000000e01 : !torch.vtensor<[1,4,64,64],f32>, !torch.float, !torch.float -> !torch.vtensor<[1,4,64,64],f32>
    %697 = torch.aten.mul.Scalar %696, %float5.000000e-01 : !torch.vtensor<[1,4,64,64],f32>, !torch.float -> !torch.vtensor<[1,4,64,64],f32>
    %698 = torch.aten.exp %697 : !torch.vtensor<[1,4,64,64],f32> -> !torch.vtensor<[1,4,64,64],f32>
    %699 = torch.prim.ListConstruct %int1, %int4, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %700 = torch.aten.empty.memory_format %699, %none, %none, %cpu, %false, %none : !torch.list<int>, !torch.none, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[1,4,64,64],f32>
    %701 = torch.aten.empty.memory_format %699, %none, %none, %cpu, %false, %none : !torch.list<int>, !torch.none, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[1,4,64,64],f32>
    %702 = torch.aten.uniform %700, %float0.000000e00, %float1.000000e00, %none : !torch.vtensor<[1,4,64,64],f32>, !torch.float, !torch.float, !torch.none -> !torch.vtensor<[1,4,64,64],f32>
    %703 = torch.aten.uniform %701, %float0.000000e00, %float1.000000e00, %none : !torch.vtensor<[1,4,64,64],f32>, !torch.float, !torch.float, !torch.none -> !torch.vtensor<[1,4,64,64],f32>
    %704 = torch.aten.log %702 : !torch.vtensor<[1,4,64,64],f32> -> !torch.vtensor<[1,4,64,64],f32>
    %705 = torch.aten.mul.Scalar %704, %float-2.000000e00 : !torch.vtensor<[1,4,64,64],f32>, !torch.float -> !torch.vtensor<[1,4,64,64],f32>
    %706 = torch.aten.sqrt %705 : !torch.vtensor<[1,4,64,64],f32> -> !torch.vtensor<[1,4,64,64],f32>
    %707 = torch.aten.mul.Scalar %703, %float6.283180e00 : !torch.vtensor<[1,4,64,64],f32>, !torch.float -> !torch.vtensor<[1,4,64,64],f32>
    %708 = torch.aten.cos %707 : !torch.vtensor<[1,4,64,64],f32> -> !torch.vtensor<[1,4,64,64],f32>
    %709 = torch.aten.mul.Tensor %706, %708 : !torch.vtensor<[1,4,64,64],f32>, !torch.vtensor<[1,4,64,64],f32> -> !torch.vtensor<[1,4,64,64],f32>
    %710 = torch.aten.mul.Tensor %698, %709 : !torch.vtensor<[1,4,64,64],f32>, !torch.vtensor<[1,4,64,64],f32> -> !torch.vtensor<[1,4,64,64],f32>
    %711 = torch.aten.add.Tensor %694, %710, %int1 : !torch.vtensor<[1,4,64,64],f32>, !torch.vtensor<[1,4,64,64],f32>, !torch.int -> !torch.vtensor<[1,4,64,64],f32>
    %712 = torch.aten.detach %711 : !torch.vtensor<[1,4,64,64],f32> -> !torch.vtensor<[1,4,64,64],f32>
    %713 = torch.aten.mul.Scalar %712, %float1.821500e-01 : !torch.vtensor<[1,4,64,64],f32>, !torch.float -> !torch.vtensor<[1,4,64,64],f32>
    %714 = torch.aten.empty.memory_format %699, %none, %none, %cpu, %false, %none : !torch.list<int>, !torch.none, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[1,4,64,64],f32>
    %715 = torch.aten.empty.memory_format %699, %none, %none, %cpu, %false, %none : !torch.list<int>, !torch.none, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[1,4,64,64],f32>
    %716 = torch.aten.uniform %714, %float0.000000e00, %float1.000000e00, %none : !torch.vtensor<[1,4,64,64],f32>, !torch.float, !torch.float, !torch.none -> !torch.vtensor<[1,4,64,64],f32>
    %717 = torch.aten.uniform %715, %float0.000000e00, %float1.000000e00, %none : !torch.vtensor<[1,4,64,64],f32>, !torch.float, !torch.float, !torch.none -> !torch.vtensor<[1,4,64,64],f32>
    %718 = torch.aten.log %716 : !torch.vtensor<[1,4,64,64],f32> -> !torch.vtensor<[1,4,64,64],f32>
    %719 = torch.aten.mul.Scalar %718, %float-2.000000e00 : !torch.vtensor<[1,4,64,64],f32>, !torch.float -> !torch.vtensor<[1,4,64,64],f32>
    %720 = torch.aten.sqrt %719 : !torch.vtensor<[1,4,64,64],f32> -> !torch.vtensor<[1,4,64,64],f32>
    %721 = torch.aten.mul.Scalar %717, %float6.283180e00 : !torch.vtensor<[1,4,64,64],f32>, !torch.float -> !torch.vtensor<[1,4,64,64],f32>
    %722 = torch.aten.cos %721 : !torch.vtensor<[1,4,64,64],f32> -> !torch.vtensor<[1,4,64,64],f32>
    %723 = torch.aten.mul.Tensor %720, %722 : !torch.vtensor<[1,4,64,64],f32>, !torch.vtensor<[1,4,64,64],f32> -> !torch.vtensor<[1,4,64,64],f32>
    %724 = torch.prim.ListConstruct %int1 : (!torch.int) -> !torch.list<int>
    %725 = torch.aten.empty.memory_format %724, %none, %none, %cpu, %false, %none : !torch.list<int>, !torch.none, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[1],f32>
    %726 = torch.aten.uniform %725, %float0.000000e00, %float1.000000e03, %none : !torch.vtensor<[1],f32>, !torch.float, !torch.float, !torch.none -> !torch.vtensor<[1],f32>
    %727 = torch.aten.to.dtype %726, %int4, %false, %false, %none : !torch.vtensor<[1],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1],si64>
    %728 = torch.prim.ListConstruct %727 : (!torch.vtensor<[1],si64>) -> !torch.list<vtensor>
    %729 = torch.aten.index.Tensor %arg307, %728 : !torch.vtensor<[1000],f32>, !torch.list<vtensor> -> !torch.vtensor<[1],f32>
    %730 = torch.aten.pow.Tensor_Scalar %729, %float5.000000e-01 : !torch.vtensor<[1],f32>, !torch.float -> !torch.vtensor<[1],f32>
    %731 = torch.aten.unsqueeze %730, %int-1 : !torch.vtensor<[1],f32>, !torch.int -> !torch.vtensor<[1,1],f32>
    %732 = torch.aten.unsqueeze %731, %int-1 : !torch.vtensor<[1,1],f32>, !torch.int -> !torch.vtensor<[1,1,1],f32>
    %733 = torch.aten.unsqueeze %732, %int-1 : !torch.vtensor<[1,1,1],f32>, !torch.int -> !torch.vtensor<[1,1,1,1],f32>
    %734 = torch.aten.index.Tensor %arg307, %728 : !torch.vtensor<[1000],f32>, !torch.list<vtensor> -> !torch.vtensor<[1],f32>
    %735 = torch.aten.rsub.Scalar %734, %int1, %int1 : !torch.vtensor<[1],f32>, !torch.int, !torch.int -> !torch.vtensor<[1],f32>
    %736 = torch.aten.pow.Tensor_Scalar %735, %float5.000000e-01 : !torch.vtensor<[1],f32>, !torch.float -> !torch.vtensor<[1],f32>
    %737 = torch.aten.unsqueeze %736, %int-1 : !torch.vtensor<[1],f32>, !torch.int -> !torch.vtensor<[1,1],f32>
    %738 = torch.aten.unsqueeze %737, %int-1 : !torch.vtensor<[1,1],f32>, !torch.int -> !torch.vtensor<[1,1,1],f32>
    %739 = torch.aten.unsqueeze %738, %int-1 : !torch.vtensor<[1,1,1],f32>, !torch.int -> !torch.vtensor<[1,1,1,1],f32>
    %740 = torch.aten.mul.Tensor %733, %713 : !torch.vtensor<[1,1,1,1],f32>, !torch.vtensor<[1,4,64,64],f32> -> !torch.vtensor<[1,4,64,64],f32>
    %741 = torch.aten.mul.Tensor %739, %723 : !torch.vtensor<[1,1,1,1],f32>, !torch.vtensor<[1,4,64,64],f32> -> !torch.vtensor<[1,4,64,64],f32>
    %742 = torch.aten.add.Tensor %740, %741, %int1 : !torch.vtensor<[1,4,64,64],f32>, !torch.vtensor<[1,4,64,64],f32>, !torch.int -> !torch.vtensor<[1,4,64,64],f32>
    %743 = torch.prim.ListConstruct %int-1, %int77 : (!torch.int, !torch.int) -> !torch.list<int>
    %744 = torch.aten.view %arg305, %743 : !torch.vtensor<[1,77],si64>, !torch.list<int> -> !torch.vtensor<[1,77],si64>
    %745 = torch.aten.embedding %arg108, %744, %int-1, %false, %false : !torch.vtensor<[49409,768],f32>, !torch.vtensor<[1,77],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,77,768],f32>
    %746 = torch.aten.embedding %arg109, %arg304, %int-1, %false, %false : !torch.vtensor<[77,768],f32>, !torch.vtensor<[1,77],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,77,768],f32>
    %747 = torch.aten.add.Tensor %745, %746, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %748 = torch.prim.ListConstruct %int1, %int77, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %749 = torch.aten.empty.memory_format %748, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[1,77,77],f32>
    %750 = torch.aten.clone %0, %none : !torch.vtensor<[],f32>, !torch.none -> !torch.vtensor<[],f32>
    %751 = torch.aten.fill.Tensor %749, %750 : !torch.vtensor<[1,77,77],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,77,77],f32>
    %752 = torch.aten.triu %751, %int1 : !torch.vtensor<[1,77,77],f32>, !torch.int -> !torch.vtensor<[1,77,77],f32>
    %753 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %754 = torch.aten.sum.dim_IntList %747, %753, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %755 = torch.aten.div.Scalar %754, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %756 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %757 = torch.aten.broadcast_to %755, %756 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %758 = torch.aten.sub.Tensor %747, %757, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %759 = torch.aten.mul.Tensor %758, %758 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %760 = torch.aten.sum.dim_IntList %759, %753, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %761 = torch.aten.div.Scalar %760, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %762 = torch.aten.add.Scalar %761, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %763 = torch.aten.rsqrt %762 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %764 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %765 = torch.aten.broadcast_to %763, %764 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %766 = torch.aten.mul.Tensor %758, %765 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %767 = torch.aten.mul.Tensor %766, %arg110 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %768 = torch.aten.add.Tensor %767, %arg111, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %769 = torch.aten.transpose.int %arg112, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %770 = torch.prim.ListConstruct %int77, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
    %771 = torch.aten.view %768, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %772 = torch.aten.mm %771, %769 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %773 = torch.aten.mul.Scalar %arg113, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %774 = torch.aten.add.Tensor %773, %772, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %775 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %776 = torch.aten.view %774, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %777 = torch.aten.mul.Scalar %776, %float1.250000e-01 : !torch.vtensor<[1,77,768],f32>, !torch.float -> !torch.vtensor<[1,77,768],f32>
    %778 = torch.aten.transpose.int %arg114, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %779 = torch.aten.view %768, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %780 = torch.aten.mm %779, %778 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %781 = torch.aten.mul.Scalar %arg115, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %782 = torch.aten.add.Tensor %781, %780, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %783 = torch.aten.view %782, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %784 = torch.prim.ListConstruct %int1, %int-1, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %785 = torch.aten.view %783, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %786 = torch.aten.transpose.int %785, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %787 = torch.aten.clone %786, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %788 = torch.aten.transpose.int %arg116, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %789 = torch.aten.view %768, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %790 = torch.aten.mm %789, %788 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %791 = torch.aten.mul.Scalar %arg117, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %792 = torch.aten.add.Tensor %791, %790, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %793 = torch.aten.view %792, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %794 = torch.aten.view %793, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %795 = torch.aten.transpose.int %794, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %796 = torch.aten.clone %795, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %797 = torch.prim.ListConstruct %int1, %int77, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %798 = torch.aten.view %777, %797 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %799 = torch.aten.transpose.int %798, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %800 = torch.aten.clone %799, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %801 = torch.prim.ListConstruct %int12, %int-1, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %802 = torch.aten.view %800, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %803 = torch.aten.view %787, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %804 = torch.aten.view %796, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %805 = torch.aten.transpose.int %803, %int1, %int2 : !torch.vtensor<[12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[12,64,77],f32>
    %806 = torch.aten.bmm %802, %805 : !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %807 = torch.prim.ListConstruct %int1, %int12, %int77, %int77 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %808 = torch.aten.view %806, %807 : !torch.vtensor<[12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,77],f32>
    %809 = torch.aten.unsqueeze %752, %int1 : !torch.vtensor<[1,77,77],f32>, !torch.int -> !torch.vtensor<[1,1,77,77],f32>
    %810 = torch.aten.add.Tensor %808, %809, %int1 : !torch.vtensor<[1,12,77,77],f32>, !torch.vtensor<[1,1,77,77],f32>, !torch.int -> !torch.vtensor<[1,12,77,77],f32>
    %811 = torch.prim.ListConstruct %int12, %int77, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %812 = torch.aten.view %810, %811 : !torch.vtensor<[1,12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[12,77,77],f32>
    %values_0, %indices_1 = torch.aten.max.dim %812, %int-1, %true : !torch.vtensor<[12,77,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[12,77,1],f32>, !torch.vtensor<[12,77,1],si64>
    %813 = torch.aten.sub.Tensor %812, %values_0, %float1.000000e00 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32>, !torch.float -> !torch.vtensor<[12,77,77],f32>
    %814 = torch.aten.exp %813 : !torch.vtensor<[12,77,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %815 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %816 = torch.aten.sum.dim_IntList %814, %815, %true, %none : !torch.vtensor<[12,77,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[12,77,1],f32>
    %817 = torch.aten.div.Tensor %814, %816 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32> -> !torch.vtensor<[12,77,77],f32>
    %818 = torch.aten.bmm %817, %804 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32> -> !torch.vtensor<[12,77,64],f32>
    %819 = torch.prim.ListConstruct %int1, %int12, %int77, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %820 = torch.aten.view %818, %819 : !torch.vtensor<[12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,64],f32>
    %821 = torch.aten.transpose.int %820, %int1, %int2 : !torch.vtensor<[1,12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %822 = torch.aten.clone %821, %int0 : !torch.vtensor<[1,77,12,64],f32>, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %823 = torch.aten.view %822, %775 : !torch.vtensor<[1,77,12,64],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %824 = torch.aten.transpose.int %arg118, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %825 = torch.aten.view %823, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %826 = torch.aten.mm %825, %824 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %827 = torch.aten.mul.Scalar %arg119, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %828 = torch.aten.add.Tensor %827, %826, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %829 = torch.aten.view %828, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %830 = torch.aten.add.Tensor %747, %829, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %831 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %832 = torch.aten.sum.dim_IntList %830, %831, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %833 = torch.aten.div.Scalar %832, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %834 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %835 = torch.aten.broadcast_to %833, %834 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %836 = torch.aten.sub.Tensor %830, %835, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %837 = torch.aten.mul.Tensor %836, %836 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %838 = torch.aten.sum.dim_IntList %837, %831, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %839 = torch.aten.div.Scalar %838, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %840 = torch.aten.add.Scalar %839, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %841 = torch.aten.rsqrt %840 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %842 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %843 = torch.aten.broadcast_to %841, %842 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %844 = torch.aten.mul.Tensor %836, %843 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %845 = torch.aten.mul.Tensor %844, %arg120 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %846 = torch.aten.add.Tensor %845, %arg121, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %847 = torch.aten.transpose.int %arg122, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32>
    %848 = torch.aten.view %846, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %849 = torch.aten.mm %848, %847 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[77,3072],f32>
    %850 = torch.aten.mul.Scalar %arg123, %int1 : !torch.vtensor<[3072],f32>, !torch.int -> !torch.vtensor<[3072],f32>
    %851 = torch.aten.add.Tensor %850, %849, %int1 : !torch.vtensor<[3072],f32>, !torch.vtensor<[77,3072],f32>, !torch.int -> !torch.vtensor<[77,3072],f32>
    %852 = torch.prim.ListConstruct %int1, %int77, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %853 = torch.aten.view %851, %852 : !torch.vtensor<[77,3072],f32>, !torch.list<int> -> !torch.vtensor<[1,77,3072],f32>
    %854 = torch.aten.mul.Scalar %853, %float1.702000e00 : !torch.vtensor<[1,77,3072],f32>, !torch.float -> !torch.vtensor<[1,77,3072],f32>
    %855 = torch.aten.sigmoid %854 : !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %856 = torch.aten.mul.Tensor %853, %855 : !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %857 = torch.aten.transpose.int %arg124, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32>
    %858 = torch.prim.ListConstruct %int77, %int3072 : (!torch.int, !torch.int) -> !torch.list<int>
    %859 = torch.aten.view %856, %858 : !torch.vtensor<[1,77,3072],f32>, !torch.list<int> -> !torch.vtensor<[77,3072],f32>
    %860 = torch.aten.mm %859, %857 : !torch.vtensor<[77,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[77,768],f32>
    %861 = torch.aten.mul.Scalar %arg125, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %862 = torch.aten.add.Tensor %861, %860, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %863 = torch.aten.view %862, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %864 = torch.aten.add.Tensor %830, %863, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %865 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %866 = torch.aten.sum.dim_IntList %864, %865, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %867 = torch.aten.div.Scalar %866, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %868 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %869 = torch.aten.broadcast_to %867, %868 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %870 = torch.aten.sub.Tensor %864, %869, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %871 = torch.aten.mul.Tensor %870, %870 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %872 = torch.aten.sum.dim_IntList %871, %865, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %873 = torch.aten.div.Scalar %872, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %874 = torch.aten.add.Scalar %873, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %875 = torch.aten.rsqrt %874 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %876 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %877 = torch.aten.broadcast_to %875, %876 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %878 = torch.aten.mul.Tensor %870, %877 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %879 = torch.aten.mul.Tensor %878, %arg126 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %880 = torch.aten.add.Tensor %879, %arg127, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %881 = torch.aten.transpose.int %arg128, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %882 = torch.aten.view %880, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %883 = torch.aten.mm %882, %881 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %884 = torch.aten.mul.Scalar %arg129, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %885 = torch.aten.add.Tensor %884, %883, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %886 = torch.aten.view %885, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %887 = torch.aten.mul.Scalar %886, %float1.250000e-01 : !torch.vtensor<[1,77,768],f32>, !torch.float -> !torch.vtensor<[1,77,768],f32>
    %888 = torch.aten.transpose.int %arg130, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %889 = torch.aten.view %880, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %890 = torch.aten.mm %889, %888 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %891 = torch.aten.mul.Scalar %arg131, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %892 = torch.aten.add.Tensor %891, %890, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %893 = torch.aten.view %892, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %894 = torch.aten.view %893, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %895 = torch.aten.transpose.int %894, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %896 = torch.aten.clone %895, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %897 = torch.aten.transpose.int %arg132, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %898 = torch.aten.view %880, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %899 = torch.aten.mm %898, %897 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %900 = torch.aten.mul.Scalar %arg133, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %901 = torch.aten.add.Tensor %900, %899, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %902 = torch.aten.view %901, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %903 = torch.aten.view %902, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %904 = torch.aten.transpose.int %903, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %905 = torch.aten.clone %904, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %906 = torch.aten.view %887, %797 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %907 = torch.aten.transpose.int %906, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %908 = torch.aten.clone %907, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %909 = torch.aten.view %908, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %910 = torch.aten.view %896, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %911 = torch.aten.view %905, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %912 = torch.aten.transpose.int %910, %int1, %int2 : !torch.vtensor<[12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[12,64,77],f32>
    %913 = torch.aten.bmm %909, %912 : !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %914 = torch.aten.view %913, %807 : !torch.vtensor<[12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,77],f32>
    %915 = torch.aten.add.Tensor %914, %809, %int1 : !torch.vtensor<[1,12,77,77],f32>, !torch.vtensor<[1,1,77,77],f32>, !torch.int -> !torch.vtensor<[1,12,77,77],f32>
    %916 = torch.aten.view %915, %811 : !torch.vtensor<[1,12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[12,77,77],f32>
    %values_2, %indices_3 = torch.aten.max.dim %916, %int-1, %true : !torch.vtensor<[12,77,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[12,77,1],f32>, !torch.vtensor<[12,77,1],si64>
    %917 = torch.aten.sub.Tensor %916, %values_2, %float1.000000e00 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32>, !torch.float -> !torch.vtensor<[12,77,77],f32>
    %918 = torch.aten.exp %917 : !torch.vtensor<[12,77,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %919 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %920 = torch.aten.sum.dim_IntList %918, %919, %true, %none : !torch.vtensor<[12,77,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[12,77,1],f32>
    %921 = torch.aten.div.Tensor %918, %920 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32> -> !torch.vtensor<[12,77,77],f32>
    %922 = torch.aten.bmm %921, %911 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32> -> !torch.vtensor<[12,77,64],f32>
    %923 = torch.aten.view %922, %819 : !torch.vtensor<[12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,64],f32>
    %924 = torch.aten.transpose.int %923, %int1, %int2 : !torch.vtensor<[1,12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %925 = torch.aten.clone %924, %int0 : !torch.vtensor<[1,77,12,64],f32>, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %926 = torch.aten.view %925, %775 : !torch.vtensor<[1,77,12,64],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %927 = torch.aten.transpose.int %arg134, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %928 = torch.aten.view %926, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %929 = torch.aten.mm %928, %927 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %930 = torch.aten.mul.Scalar %arg135, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %931 = torch.aten.add.Tensor %930, %929, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %932 = torch.aten.view %931, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %933 = torch.aten.add.Tensor %864, %932, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %934 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %935 = torch.aten.sum.dim_IntList %933, %934, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %936 = torch.aten.div.Scalar %935, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %937 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %938 = torch.aten.broadcast_to %936, %937 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %939 = torch.aten.sub.Tensor %933, %938, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %940 = torch.aten.mul.Tensor %939, %939 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %941 = torch.aten.sum.dim_IntList %940, %934, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %942 = torch.aten.div.Scalar %941, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %943 = torch.aten.add.Scalar %942, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %944 = torch.aten.rsqrt %943 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %945 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %946 = torch.aten.broadcast_to %944, %945 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %947 = torch.aten.mul.Tensor %939, %946 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %948 = torch.aten.mul.Tensor %947, %arg136 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %949 = torch.aten.add.Tensor %948, %arg137, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %950 = torch.aten.transpose.int %arg138, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32>
    %951 = torch.aten.view %949, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %952 = torch.aten.mm %951, %950 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[77,3072],f32>
    %953 = torch.aten.mul.Scalar %arg139, %int1 : !torch.vtensor<[3072],f32>, !torch.int -> !torch.vtensor<[3072],f32>
    %954 = torch.aten.add.Tensor %953, %952, %int1 : !torch.vtensor<[3072],f32>, !torch.vtensor<[77,3072],f32>, !torch.int -> !torch.vtensor<[77,3072],f32>
    %955 = torch.aten.view %954, %852 : !torch.vtensor<[77,3072],f32>, !torch.list<int> -> !torch.vtensor<[1,77,3072],f32>
    %956 = torch.aten.mul.Scalar %955, %float1.702000e00 : !torch.vtensor<[1,77,3072],f32>, !torch.float -> !torch.vtensor<[1,77,3072],f32>
    %957 = torch.aten.sigmoid %956 : !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %958 = torch.aten.mul.Tensor %955, %957 : !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %959 = torch.aten.transpose.int %arg140, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32>
    %960 = torch.aten.view %958, %858 : !torch.vtensor<[1,77,3072],f32>, !torch.list<int> -> !torch.vtensor<[77,3072],f32>
    %961 = torch.aten.mm %960, %959 : !torch.vtensor<[77,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[77,768],f32>
    %962 = torch.aten.mul.Scalar %arg141, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %963 = torch.aten.add.Tensor %962, %961, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %964 = torch.aten.view %963, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %965 = torch.aten.add.Tensor %933, %964, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %966 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %967 = torch.aten.sum.dim_IntList %965, %966, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %968 = torch.aten.div.Scalar %967, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %969 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %970 = torch.aten.broadcast_to %968, %969 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %971 = torch.aten.sub.Tensor %965, %970, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %972 = torch.aten.mul.Tensor %971, %971 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %973 = torch.aten.sum.dim_IntList %972, %966, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %974 = torch.aten.div.Scalar %973, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %975 = torch.aten.add.Scalar %974, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %976 = torch.aten.rsqrt %975 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %977 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %978 = torch.aten.broadcast_to %976, %977 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %979 = torch.aten.mul.Tensor %971, %978 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %980 = torch.aten.mul.Tensor %979, %arg142 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %981 = torch.aten.add.Tensor %980, %arg143, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %982 = torch.aten.transpose.int %arg144, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %983 = torch.aten.view %981, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %984 = torch.aten.mm %983, %982 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %985 = torch.aten.mul.Scalar %arg145, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %986 = torch.aten.add.Tensor %985, %984, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %987 = torch.aten.view %986, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %988 = torch.aten.mul.Scalar %987, %float1.250000e-01 : !torch.vtensor<[1,77,768],f32>, !torch.float -> !torch.vtensor<[1,77,768],f32>
    %989 = torch.aten.transpose.int %arg146, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %990 = torch.aten.view %981, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %991 = torch.aten.mm %990, %989 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %992 = torch.aten.mul.Scalar %arg147, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %993 = torch.aten.add.Tensor %992, %991, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %994 = torch.aten.view %993, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %995 = torch.aten.view %994, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %996 = torch.aten.transpose.int %995, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %997 = torch.aten.clone %996, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %998 = torch.aten.transpose.int %arg148, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %999 = torch.aten.view %981, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1000 = torch.aten.mm %999, %998 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1001 = torch.aten.mul.Scalar %arg149, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1002 = torch.aten.add.Tensor %1001, %1000, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1003 = torch.aten.view %1002, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1004 = torch.aten.view %1003, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1005 = torch.aten.transpose.int %1004, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1006 = torch.aten.clone %1005, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1007 = torch.aten.view %988, %797 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1008 = torch.aten.transpose.int %1007, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1009 = torch.aten.clone %1008, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1010 = torch.aten.view %1009, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1011 = torch.aten.view %997, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1012 = torch.aten.view %1006, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1013 = torch.aten.transpose.int %1011, %int1, %int2 : !torch.vtensor<[12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[12,64,77],f32>
    %1014 = torch.aten.bmm %1010, %1013 : !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1015 = torch.aten.view %1014, %807 : !torch.vtensor<[12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,77],f32>
    %1016 = torch.aten.add.Tensor %1015, %809, %int1 : !torch.vtensor<[1,12,77,77],f32>, !torch.vtensor<[1,1,77,77],f32>, !torch.int -> !torch.vtensor<[1,12,77,77],f32>
    %1017 = torch.aten.view %1016, %811 : !torch.vtensor<[1,12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[12,77,77],f32>
    %values_4, %indices_5 = torch.aten.max.dim %1017, %int-1, %true : !torch.vtensor<[12,77,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[12,77,1],f32>, !torch.vtensor<[12,77,1],si64>
    %1018 = torch.aten.sub.Tensor %1017, %values_4, %float1.000000e00 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32>, !torch.float -> !torch.vtensor<[12,77,77],f32>
    %1019 = torch.aten.exp %1018 : !torch.vtensor<[12,77,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1020 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1021 = torch.aten.sum.dim_IntList %1019, %1020, %true, %none : !torch.vtensor<[12,77,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[12,77,1],f32>
    %1022 = torch.aten.div.Tensor %1019, %1021 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32> -> !torch.vtensor<[12,77,77],f32>
    %1023 = torch.aten.bmm %1022, %1012 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32> -> !torch.vtensor<[12,77,64],f32>
    %1024 = torch.aten.view %1023, %819 : !torch.vtensor<[12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,64],f32>
    %1025 = torch.aten.transpose.int %1024, %int1, %int2 : !torch.vtensor<[1,12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1026 = torch.aten.clone %1025, %int0 : !torch.vtensor<[1,77,12,64],f32>, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1027 = torch.aten.view %1026, %775 : !torch.vtensor<[1,77,12,64],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1028 = torch.aten.transpose.int %arg150, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1029 = torch.aten.view %1027, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1030 = torch.aten.mm %1029, %1028 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1031 = torch.aten.mul.Scalar %arg151, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1032 = torch.aten.add.Tensor %1031, %1030, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1033 = torch.aten.view %1032, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1034 = torch.aten.add.Tensor %965, %1033, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1035 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1036 = torch.aten.sum.dim_IntList %1034, %1035, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1037 = torch.aten.div.Scalar %1036, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1038 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1039 = torch.aten.broadcast_to %1037, %1038 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1040 = torch.aten.sub.Tensor %1034, %1039, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1041 = torch.aten.mul.Tensor %1040, %1040 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1042 = torch.aten.sum.dim_IntList %1041, %1035, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1043 = torch.aten.div.Scalar %1042, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1044 = torch.aten.add.Scalar %1043, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1045 = torch.aten.rsqrt %1044 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1046 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1047 = torch.aten.broadcast_to %1045, %1046 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1048 = torch.aten.mul.Tensor %1040, %1047 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1049 = torch.aten.mul.Tensor %1048, %arg152 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1050 = torch.aten.add.Tensor %1049, %arg153, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1051 = torch.aten.transpose.int %arg154, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32>
    %1052 = torch.aten.view %1050, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1053 = torch.aten.mm %1052, %1051 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[77,3072],f32>
    %1054 = torch.aten.mul.Scalar %arg155, %int1 : !torch.vtensor<[3072],f32>, !torch.int -> !torch.vtensor<[3072],f32>
    %1055 = torch.aten.add.Tensor %1054, %1053, %int1 : !torch.vtensor<[3072],f32>, !torch.vtensor<[77,3072],f32>, !torch.int -> !torch.vtensor<[77,3072],f32>
    %1056 = torch.aten.view %1055, %852 : !torch.vtensor<[77,3072],f32>, !torch.list<int> -> !torch.vtensor<[1,77,3072],f32>
    %1057 = torch.aten.mul.Scalar %1056, %float1.702000e00 : !torch.vtensor<[1,77,3072],f32>, !torch.float -> !torch.vtensor<[1,77,3072],f32>
    %1058 = torch.aten.sigmoid %1057 : !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1059 = torch.aten.mul.Tensor %1056, %1058 : !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1060 = torch.aten.transpose.int %arg156, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32>
    %1061 = torch.aten.view %1059, %858 : !torch.vtensor<[1,77,3072],f32>, !torch.list<int> -> !torch.vtensor<[77,3072],f32>
    %1062 = torch.aten.mm %1061, %1060 : !torch.vtensor<[77,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[77,768],f32>
    %1063 = torch.aten.mul.Scalar %arg157, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1064 = torch.aten.add.Tensor %1063, %1062, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1065 = torch.aten.view %1064, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1066 = torch.aten.add.Tensor %1034, %1065, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1067 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1068 = torch.aten.sum.dim_IntList %1066, %1067, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1069 = torch.aten.div.Scalar %1068, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1070 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1071 = torch.aten.broadcast_to %1069, %1070 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1072 = torch.aten.sub.Tensor %1066, %1071, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1073 = torch.aten.mul.Tensor %1072, %1072 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1074 = torch.aten.sum.dim_IntList %1073, %1067, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1075 = torch.aten.div.Scalar %1074, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1076 = torch.aten.add.Scalar %1075, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1077 = torch.aten.rsqrt %1076 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1078 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1079 = torch.aten.broadcast_to %1077, %1078 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1080 = torch.aten.mul.Tensor %1072, %1079 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1081 = torch.aten.mul.Tensor %1080, %arg158 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1082 = torch.aten.add.Tensor %1081, %arg159, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1083 = torch.aten.transpose.int %arg160, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1084 = torch.aten.view %1082, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1085 = torch.aten.mm %1084, %1083 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1086 = torch.aten.mul.Scalar %arg161, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1087 = torch.aten.add.Tensor %1086, %1085, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1088 = torch.aten.view %1087, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1089 = torch.aten.mul.Scalar %1088, %float1.250000e-01 : !torch.vtensor<[1,77,768],f32>, !torch.float -> !torch.vtensor<[1,77,768],f32>
    %1090 = torch.aten.transpose.int %arg162, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1091 = torch.aten.view %1082, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1092 = torch.aten.mm %1091, %1090 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1093 = torch.aten.mul.Scalar %arg163, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1094 = torch.aten.add.Tensor %1093, %1092, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1095 = torch.aten.view %1094, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1096 = torch.aten.view %1095, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1097 = torch.aten.transpose.int %1096, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1098 = torch.aten.clone %1097, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1099 = torch.aten.transpose.int %arg164, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1100 = torch.aten.view %1082, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1101 = torch.aten.mm %1100, %1099 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1102 = torch.aten.mul.Scalar %arg165, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1103 = torch.aten.add.Tensor %1102, %1101, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1104 = torch.aten.view %1103, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1105 = torch.aten.view %1104, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1106 = torch.aten.transpose.int %1105, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1107 = torch.aten.clone %1106, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1108 = torch.aten.view %1089, %797 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1109 = torch.aten.transpose.int %1108, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1110 = torch.aten.clone %1109, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1111 = torch.aten.view %1110, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1112 = torch.aten.view %1098, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1113 = torch.aten.view %1107, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1114 = torch.aten.transpose.int %1112, %int1, %int2 : !torch.vtensor<[12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[12,64,77],f32>
    %1115 = torch.aten.bmm %1111, %1114 : !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1116 = torch.aten.view %1115, %807 : !torch.vtensor<[12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,77],f32>
    %1117 = torch.aten.add.Tensor %1116, %809, %int1 : !torch.vtensor<[1,12,77,77],f32>, !torch.vtensor<[1,1,77,77],f32>, !torch.int -> !torch.vtensor<[1,12,77,77],f32>
    %1118 = torch.aten.view %1117, %811 : !torch.vtensor<[1,12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[12,77,77],f32>
    %values_6, %indices_7 = torch.aten.max.dim %1118, %int-1, %true : !torch.vtensor<[12,77,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[12,77,1],f32>, !torch.vtensor<[12,77,1],si64>
    %1119 = torch.aten.sub.Tensor %1118, %values_6, %float1.000000e00 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32>, !torch.float -> !torch.vtensor<[12,77,77],f32>
    %1120 = torch.aten.exp %1119 : !torch.vtensor<[12,77,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1121 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1122 = torch.aten.sum.dim_IntList %1120, %1121, %true, %none : !torch.vtensor<[12,77,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[12,77,1],f32>
    %1123 = torch.aten.div.Tensor %1120, %1122 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32> -> !torch.vtensor<[12,77,77],f32>
    %1124 = torch.aten.bmm %1123, %1113 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32> -> !torch.vtensor<[12,77,64],f32>
    %1125 = torch.aten.view %1124, %819 : !torch.vtensor<[12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,64],f32>
    %1126 = torch.aten.transpose.int %1125, %int1, %int2 : !torch.vtensor<[1,12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1127 = torch.aten.clone %1126, %int0 : !torch.vtensor<[1,77,12,64],f32>, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1128 = torch.aten.view %1127, %775 : !torch.vtensor<[1,77,12,64],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1129 = torch.aten.transpose.int %arg166, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1130 = torch.aten.view %1128, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1131 = torch.aten.mm %1130, %1129 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1132 = torch.aten.mul.Scalar %arg167, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1133 = torch.aten.add.Tensor %1132, %1131, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1134 = torch.aten.view %1133, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1135 = torch.aten.add.Tensor %1066, %1134, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1136 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1137 = torch.aten.sum.dim_IntList %1135, %1136, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1138 = torch.aten.div.Scalar %1137, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1139 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1140 = torch.aten.broadcast_to %1138, %1139 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1141 = torch.aten.sub.Tensor %1135, %1140, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1142 = torch.aten.mul.Tensor %1141, %1141 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1143 = torch.aten.sum.dim_IntList %1142, %1136, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1144 = torch.aten.div.Scalar %1143, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1145 = torch.aten.add.Scalar %1144, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1146 = torch.aten.rsqrt %1145 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1147 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1148 = torch.aten.broadcast_to %1146, %1147 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1149 = torch.aten.mul.Tensor %1141, %1148 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1150 = torch.aten.mul.Tensor %1149, %arg168 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1151 = torch.aten.add.Tensor %1150, %arg169, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1152 = torch.aten.transpose.int %arg170, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32>
    %1153 = torch.aten.view %1151, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1154 = torch.aten.mm %1153, %1152 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[77,3072],f32>
    %1155 = torch.aten.mul.Scalar %arg171, %int1 : !torch.vtensor<[3072],f32>, !torch.int -> !torch.vtensor<[3072],f32>
    %1156 = torch.aten.add.Tensor %1155, %1154, %int1 : !torch.vtensor<[3072],f32>, !torch.vtensor<[77,3072],f32>, !torch.int -> !torch.vtensor<[77,3072],f32>
    %1157 = torch.aten.view %1156, %852 : !torch.vtensor<[77,3072],f32>, !torch.list<int> -> !torch.vtensor<[1,77,3072],f32>
    %1158 = torch.aten.mul.Scalar %1157, %float1.702000e00 : !torch.vtensor<[1,77,3072],f32>, !torch.float -> !torch.vtensor<[1,77,3072],f32>
    %1159 = torch.aten.sigmoid %1158 : !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1160 = torch.aten.mul.Tensor %1157, %1159 : !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1161 = torch.aten.transpose.int %arg172, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32>
    %1162 = torch.aten.view %1160, %858 : !torch.vtensor<[1,77,3072],f32>, !torch.list<int> -> !torch.vtensor<[77,3072],f32>
    %1163 = torch.aten.mm %1162, %1161 : !torch.vtensor<[77,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[77,768],f32>
    %1164 = torch.aten.mul.Scalar %arg173, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1165 = torch.aten.add.Tensor %1164, %1163, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1166 = torch.aten.view %1165, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1167 = torch.aten.add.Tensor %1135, %1166, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1168 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1169 = torch.aten.sum.dim_IntList %1167, %1168, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1170 = torch.aten.div.Scalar %1169, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1171 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1172 = torch.aten.broadcast_to %1170, %1171 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1173 = torch.aten.sub.Tensor %1167, %1172, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1174 = torch.aten.mul.Tensor %1173, %1173 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1175 = torch.aten.sum.dim_IntList %1174, %1168, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1176 = torch.aten.div.Scalar %1175, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1177 = torch.aten.add.Scalar %1176, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1178 = torch.aten.rsqrt %1177 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1179 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1180 = torch.aten.broadcast_to %1178, %1179 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1181 = torch.aten.mul.Tensor %1173, %1180 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1182 = torch.aten.mul.Tensor %1181, %arg174 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1183 = torch.aten.add.Tensor %1182, %arg175, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1184 = torch.aten.transpose.int %arg176, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1185 = torch.aten.view %1183, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1186 = torch.aten.mm %1185, %1184 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1187 = torch.aten.mul.Scalar %arg177, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1188 = torch.aten.add.Tensor %1187, %1186, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1189 = torch.aten.view %1188, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1190 = torch.aten.mul.Scalar %1189, %float1.250000e-01 : !torch.vtensor<[1,77,768],f32>, !torch.float -> !torch.vtensor<[1,77,768],f32>
    %1191 = torch.aten.transpose.int %arg178, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1192 = torch.aten.view %1183, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1193 = torch.aten.mm %1192, %1191 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1194 = torch.aten.mul.Scalar %arg179, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1195 = torch.aten.add.Tensor %1194, %1193, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1196 = torch.aten.view %1195, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1197 = torch.aten.view %1196, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1198 = torch.aten.transpose.int %1197, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1199 = torch.aten.clone %1198, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1200 = torch.aten.transpose.int %arg180, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1201 = torch.aten.view %1183, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1202 = torch.aten.mm %1201, %1200 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1203 = torch.aten.mul.Scalar %arg181, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1204 = torch.aten.add.Tensor %1203, %1202, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1205 = torch.aten.view %1204, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1206 = torch.aten.view %1205, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1207 = torch.aten.transpose.int %1206, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1208 = torch.aten.clone %1207, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1209 = torch.aten.view %1190, %797 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1210 = torch.aten.transpose.int %1209, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1211 = torch.aten.clone %1210, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1212 = torch.aten.view %1211, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1213 = torch.aten.view %1199, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1214 = torch.aten.view %1208, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1215 = torch.aten.transpose.int %1213, %int1, %int2 : !torch.vtensor<[12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[12,64,77],f32>
    %1216 = torch.aten.bmm %1212, %1215 : !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1217 = torch.aten.view %1216, %807 : !torch.vtensor<[12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,77],f32>
    %1218 = torch.aten.add.Tensor %1217, %809, %int1 : !torch.vtensor<[1,12,77,77],f32>, !torch.vtensor<[1,1,77,77],f32>, !torch.int -> !torch.vtensor<[1,12,77,77],f32>
    %1219 = torch.aten.view %1218, %811 : !torch.vtensor<[1,12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[12,77,77],f32>
    %values_8, %indices_9 = torch.aten.max.dim %1219, %int-1, %true : !torch.vtensor<[12,77,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[12,77,1],f32>, !torch.vtensor<[12,77,1],si64>
    %1220 = torch.aten.sub.Tensor %1219, %values_8, %float1.000000e00 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32>, !torch.float -> !torch.vtensor<[12,77,77],f32>
    %1221 = torch.aten.exp %1220 : !torch.vtensor<[12,77,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1222 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1223 = torch.aten.sum.dim_IntList %1221, %1222, %true, %none : !torch.vtensor<[12,77,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[12,77,1],f32>
    %1224 = torch.aten.div.Tensor %1221, %1223 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32> -> !torch.vtensor<[12,77,77],f32>
    %1225 = torch.aten.bmm %1224, %1214 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32> -> !torch.vtensor<[12,77,64],f32>
    %1226 = torch.aten.view %1225, %819 : !torch.vtensor<[12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,64],f32>
    %1227 = torch.aten.transpose.int %1226, %int1, %int2 : !torch.vtensor<[1,12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1228 = torch.aten.clone %1227, %int0 : !torch.vtensor<[1,77,12,64],f32>, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1229 = torch.aten.view %1228, %775 : !torch.vtensor<[1,77,12,64],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1230 = torch.aten.transpose.int %arg182, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1231 = torch.aten.view %1229, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1232 = torch.aten.mm %1231, %1230 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1233 = torch.aten.mul.Scalar %arg183, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1234 = torch.aten.add.Tensor %1233, %1232, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1235 = torch.aten.view %1234, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1236 = torch.aten.add.Tensor %1167, %1235, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1237 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1238 = torch.aten.sum.dim_IntList %1236, %1237, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1239 = torch.aten.div.Scalar %1238, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1240 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1241 = torch.aten.broadcast_to %1239, %1240 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1242 = torch.aten.sub.Tensor %1236, %1241, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1243 = torch.aten.mul.Tensor %1242, %1242 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1244 = torch.aten.sum.dim_IntList %1243, %1237, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1245 = torch.aten.div.Scalar %1244, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1246 = torch.aten.add.Scalar %1245, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1247 = torch.aten.rsqrt %1246 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1248 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1249 = torch.aten.broadcast_to %1247, %1248 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1250 = torch.aten.mul.Tensor %1242, %1249 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1251 = torch.aten.mul.Tensor %1250, %arg184 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1252 = torch.aten.add.Tensor %1251, %arg185, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1253 = torch.aten.transpose.int %arg186, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32>
    %1254 = torch.aten.view %1252, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1255 = torch.aten.mm %1254, %1253 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[77,3072],f32>
    %1256 = torch.aten.mul.Scalar %arg187, %int1 : !torch.vtensor<[3072],f32>, !torch.int -> !torch.vtensor<[3072],f32>
    %1257 = torch.aten.add.Tensor %1256, %1255, %int1 : !torch.vtensor<[3072],f32>, !torch.vtensor<[77,3072],f32>, !torch.int -> !torch.vtensor<[77,3072],f32>
    %1258 = torch.aten.view %1257, %852 : !torch.vtensor<[77,3072],f32>, !torch.list<int> -> !torch.vtensor<[1,77,3072],f32>
    %1259 = torch.aten.mul.Scalar %1258, %float1.702000e00 : !torch.vtensor<[1,77,3072],f32>, !torch.float -> !torch.vtensor<[1,77,3072],f32>
    %1260 = torch.aten.sigmoid %1259 : !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1261 = torch.aten.mul.Tensor %1258, %1260 : !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1262 = torch.aten.transpose.int %arg188, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32>
    %1263 = torch.aten.view %1261, %858 : !torch.vtensor<[1,77,3072],f32>, !torch.list<int> -> !torch.vtensor<[77,3072],f32>
    %1264 = torch.aten.mm %1263, %1262 : !torch.vtensor<[77,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[77,768],f32>
    %1265 = torch.aten.mul.Scalar %arg189, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1266 = torch.aten.add.Tensor %1265, %1264, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1267 = torch.aten.view %1266, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1268 = torch.aten.add.Tensor %1236, %1267, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1269 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1270 = torch.aten.sum.dim_IntList %1268, %1269, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1271 = torch.aten.div.Scalar %1270, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1272 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1273 = torch.aten.broadcast_to %1271, %1272 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1274 = torch.aten.sub.Tensor %1268, %1273, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1275 = torch.aten.mul.Tensor %1274, %1274 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1276 = torch.aten.sum.dim_IntList %1275, %1269, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1277 = torch.aten.div.Scalar %1276, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1278 = torch.aten.add.Scalar %1277, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1279 = torch.aten.rsqrt %1278 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1280 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1281 = torch.aten.broadcast_to %1279, %1280 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1282 = torch.aten.mul.Tensor %1274, %1281 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1283 = torch.aten.mul.Tensor %1282, %arg190 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1284 = torch.aten.add.Tensor %1283, %arg191, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1285 = torch.aten.transpose.int %arg192, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1286 = torch.aten.view %1284, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1287 = torch.aten.mm %1286, %1285 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1288 = torch.aten.mul.Scalar %arg193, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1289 = torch.aten.add.Tensor %1288, %1287, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1290 = torch.aten.view %1289, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1291 = torch.aten.mul.Scalar %1290, %float1.250000e-01 : !torch.vtensor<[1,77,768],f32>, !torch.float -> !torch.vtensor<[1,77,768],f32>
    %1292 = torch.aten.transpose.int %arg194, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1293 = torch.aten.view %1284, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1294 = torch.aten.mm %1293, %1292 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1295 = torch.aten.mul.Scalar %arg195, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1296 = torch.aten.add.Tensor %1295, %1294, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1297 = torch.aten.view %1296, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1298 = torch.aten.view %1297, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1299 = torch.aten.transpose.int %1298, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1300 = torch.aten.clone %1299, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1301 = torch.aten.transpose.int %arg196, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1302 = torch.aten.view %1284, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1303 = torch.aten.mm %1302, %1301 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1304 = torch.aten.mul.Scalar %arg197, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1305 = torch.aten.add.Tensor %1304, %1303, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1306 = torch.aten.view %1305, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1307 = torch.aten.view %1306, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1308 = torch.aten.transpose.int %1307, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1309 = torch.aten.clone %1308, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1310 = torch.aten.view %1291, %797 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1311 = torch.aten.transpose.int %1310, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1312 = torch.aten.clone %1311, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1313 = torch.aten.view %1312, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1314 = torch.aten.view %1300, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1315 = torch.aten.view %1309, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1316 = torch.aten.transpose.int %1314, %int1, %int2 : !torch.vtensor<[12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[12,64,77],f32>
    %1317 = torch.aten.bmm %1313, %1316 : !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1318 = torch.aten.view %1317, %807 : !torch.vtensor<[12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,77],f32>
    %1319 = torch.aten.add.Tensor %1318, %809, %int1 : !torch.vtensor<[1,12,77,77],f32>, !torch.vtensor<[1,1,77,77],f32>, !torch.int -> !torch.vtensor<[1,12,77,77],f32>
    %1320 = torch.aten.view %1319, %811 : !torch.vtensor<[1,12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[12,77,77],f32>
    %values_10, %indices_11 = torch.aten.max.dim %1320, %int-1, %true : !torch.vtensor<[12,77,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[12,77,1],f32>, !torch.vtensor<[12,77,1],si64>
    %1321 = torch.aten.sub.Tensor %1320, %values_10, %float1.000000e00 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32>, !torch.float -> !torch.vtensor<[12,77,77],f32>
    %1322 = torch.aten.exp %1321 : !torch.vtensor<[12,77,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1323 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1324 = torch.aten.sum.dim_IntList %1322, %1323, %true, %none : !torch.vtensor<[12,77,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[12,77,1],f32>
    %1325 = torch.aten.div.Tensor %1322, %1324 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32> -> !torch.vtensor<[12,77,77],f32>
    %1326 = torch.aten.bmm %1325, %1315 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32> -> !torch.vtensor<[12,77,64],f32>
    %1327 = torch.aten.view %1326, %819 : !torch.vtensor<[12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,64],f32>
    %1328 = torch.aten.transpose.int %1327, %int1, %int2 : !torch.vtensor<[1,12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1329 = torch.aten.clone %1328, %int0 : !torch.vtensor<[1,77,12,64],f32>, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1330 = torch.aten.view %1329, %775 : !torch.vtensor<[1,77,12,64],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1331 = torch.aten.transpose.int %arg198, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1332 = torch.aten.view %1330, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1333 = torch.aten.mm %1332, %1331 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1334 = torch.aten.mul.Scalar %arg199, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1335 = torch.aten.add.Tensor %1334, %1333, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1336 = torch.aten.view %1335, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1337 = torch.aten.add.Tensor %1268, %1336, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1338 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1339 = torch.aten.sum.dim_IntList %1337, %1338, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1340 = torch.aten.div.Scalar %1339, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1341 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1342 = torch.aten.broadcast_to %1340, %1341 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1343 = torch.aten.sub.Tensor %1337, %1342, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1344 = torch.aten.mul.Tensor %1343, %1343 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1345 = torch.aten.sum.dim_IntList %1344, %1338, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1346 = torch.aten.div.Scalar %1345, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1347 = torch.aten.add.Scalar %1346, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1348 = torch.aten.rsqrt %1347 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1349 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1350 = torch.aten.broadcast_to %1348, %1349 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1351 = torch.aten.mul.Tensor %1343, %1350 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1352 = torch.aten.mul.Tensor %1351, %arg200 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1353 = torch.aten.add.Tensor %1352, %arg201, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1354 = torch.aten.transpose.int %arg202, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32>
    %1355 = torch.aten.view %1353, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1356 = torch.aten.mm %1355, %1354 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[77,3072],f32>
    %1357 = torch.aten.mul.Scalar %arg203, %int1 : !torch.vtensor<[3072],f32>, !torch.int -> !torch.vtensor<[3072],f32>
    %1358 = torch.aten.add.Tensor %1357, %1356, %int1 : !torch.vtensor<[3072],f32>, !torch.vtensor<[77,3072],f32>, !torch.int -> !torch.vtensor<[77,3072],f32>
    %1359 = torch.aten.view %1358, %852 : !torch.vtensor<[77,3072],f32>, !torch.list<int> -> !torch.vtensor<[1,77,3072],f32>
    %1360 = torch.aten.mul.Scalar %1359, %float1.702000e00 : !torch.vtensor<[1,77,3072],f32>, !torch.float -> !torch.vtensor<[1,77,3072],f32>
    %1361 = torch.aten.sigmoid %1360 : !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1362 = torch.aten.mul.Tensor %1359, %1361 : !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1363 = torch.aten.transpose.int %arg204, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32>
    %1364 = torch.aten.view %1362, %858 : !torch.vtensor<[1,77,3072],f32>, !torch.list<int> -> !torch.vtensor<[77,3072],f32>
    %1365 = torch.aten.mm %1364, %1363 : !torch.vtensor<[77,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[77,768],f32>
    %1366 = torch.aten.mul.Scalar %arg205, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1367 = torch.aten.add.Tensor %1366, %1365, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1368 = torch.aten.view %1367, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1369 = torch.aten.add.Tensor %1337, %1368, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1370 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1371 = torch.aten.sum.dim_IntList %1369, %1370, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1372 = torch.aten.div.Scalar %1371, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1373 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1374 = torch.aten.broadcast_to %1372, %1373 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1375 = torch.aten.sub.Tensor %1369, %1374, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1376 = torch.aten.mul.Tensor %1375, %1375 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1377 = torch.aten.sum.dim_IntList %1376, %1370, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1378 = torch.aten.div.Scalar %1377, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1379 = torch.aten.add.Scalar %1378, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1380 = torch.aten.rsqrt %1379 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1381 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1382 = torch.aten.broadcast_to %1380, %1381 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1383 = torch.aten.mul.Tensor %1375, %1382 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1384 = torch.aten.mul.Tensor %1383, %arg206 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1385 = torch.aten.add.Tensor %1384, %arg207, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1386 = torch.aten.transpose.int %arg208, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1387 = torch.aten.view %1385, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1388 = torch.aten.mm %1387, %1386 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1389 = torch.aten.mul.Scalar %arg209, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1390 = torch.aten.add.Tensor %1389, %1388, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1391 = torch.aten.view %1390, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1392 = torch.aten.mul.Scalar %1391, %float1.250000e-01 : !torch.vtensor<[1,77,768],f32>, !torch.float -> !torch.vtensor<[1,77,768],f32>
    %1393 = torch.aten.transpose.int %arg210, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1394 = torch.aten.view %1385, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1395 = torch.aten.mm %1394, %1393 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1396 = torch.aten.mul.Scalar %arg211, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1397 = torch.aten.add.Tensor %1396, %1395, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1398 = torch.aten.view %1397, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1399 = torch.aten.view %1398, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1400 = torch.aten.transpose.int %1399, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1401 = torch.aten.clone %1400, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1402 = torch.aten.transpose.int %arg212, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1403 = torch.aten.view %1385, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1404 = torch.aten.mm %1403, %1402 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1405 = torch.aten.mul.Scalar %arg213, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1406 = torch.aten.add.Tensor %1405, %1404, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1407 = torch.aten.view %1406, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1408 = torch.aten.view %1407, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1409 = torch.aten.transpose.int %1408, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1410 = torch.aten.clone %1409, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1411 = torch.aten.view %1392, %797 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1412 = torch.aten.transpose.int %1411, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1413 = torch.aten.clone %1412, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1414 = torch.aten.view %1413, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1415 = torch.aten.view %1401, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1416 = torch.aten.view %1410, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1417 = torch.aten.transpose.int %1415, %int1, %int2 : !torch.vtensor<[12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[12,64,77],f32>
    %1418 = torch.aten.bmm %1414, %1417 : !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1419 = torch.aten.view %1418, %807 : !torch.vtensor<[12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,77],f32>
    %1420 = torch.aten.add.Tensor %1419, %809, %int1 : !torch.vtensor<[1,12,77,77],f32>, !torch.vtensor<[1,1,77,77],f32>, !torch.int -> !torch.vtensor<[1,12,77,77],f32>
    %1421 = torch.aten.view %1420, %811 : !torch.vtensor<[1,12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[12,77,77],f32>
    %values_12, %indices_13 = torch.aten.max.dim %1421, %int-1, %true : !torch.vtensor<[12,77,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[12,77,1],f32>, !torch.vtensor<[12,77,1],si64>
    %1422 = torch.aten.sub.Tensor %1421, %values_12, %float1.000000e00 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32>, !torch.float -> !torch.vtensor<[12,77,77],f32>
    %1423 = torch.aten.exp %1422 : !torch.vtensor<[12,77,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1424 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1425 = torch.aten.sum.dim_IntList %1423, %1424, %true, %none : !torch.vtensor<[12,77,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[12,77,1],f32>
    %1426 = torch.aten.div.Tensor %1423, %1425 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32> -> !torch.vtensor<[12,77,77],f32>
    %1427 = torch.aten.bmm %1426, %1416 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32> -> !torch.vtensor<[12,77,64],f32>
    %1428 = torch.aten.view %1427, %819 : !torch.vtensor<[12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,64],f32>
    %1429 = torch.aten.transpose.int %1428, %int1, %int2 : !torch.vtensor<[1,12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1430 = torch.aten.clone %1429, %int0 : !torch.vtensor<[1,77,12,64],f32>, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1431 = torch.aten.view %1430, %775 : !torch.vtensor<[1,77,12,64],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1432 = torch.aten.transpose.int %arg214, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1433 = torch.aten.view %1431, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1434 = torch.aten.mm %1433, %1432 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1435 = torch.aten.mul.Scalar %arg215, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1436 = torch.aten.add.Tensor %1435, %1434, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1437 = torch.aten.view %1436, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1438 = torch.aten.add.Tensor %1369, %1437, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1439 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1440 = torch.aten.sum.dim_IntList %1438, %1439, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1441 = torch.aten.div.Scalar %1440, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1442 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1443 = torch.aten.broadcast_to %1441, %1442 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1444 = torch.aten.sub.Tensor %1438, %1443, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1445 = torch.aten.mul.Tensor %1444, %1444 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1446 = torch.aten.sum.dim_IntList %1445, %1439, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1447 = torch.aten.div.Scalar %1446, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1448 = torch.aten.add.Scalar %1447, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1449 = torch.aten.rsqrt %1448 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1450 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1451 = torch.aten.broadcast_to %1449, %1450 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1452 = torch.aten.mul.Tensor %1444, %1451 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1453 = torch.aten.mul.Tensor %1452, %arg216 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1454 = torch.aten.add.Tensor %1453, %arg217, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1455 = torch.aten.transpose.int %arg218, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32>
    %1456 = torch.aten.view %1454, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1457 = torch.aten.mm %1456, %1455 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[77,3072],f32>
    %1458 = torch.aten.mul.Scalar %arg219, %int1 : !torch.vtensor<[3072],f32>, !torch.int -> !torch.vtensor<[3072],f32>
    %1459 = torch.aten.add.Tensor %1458, %1457, %int1 : !torch.vtensor<[3072],f32>, !torch.vtensor<[77,3072],f32>, !torch.int -> !torch.vtensor<[77,3072],f32>
    %1460 = torch.aten.view %1459, %852 : !torch.vtensor<[77,3072],f32>, !torch.list<int> -> !torch.vtensor<[1,77,3072],f32>
    %1461 = torch.aten.mul.Scalar %1460, %float1.702000e00 : !torch.vtensor<[1,77,3072],f32>, !torch.float -> !torch.vtensor<[1,77,3072],f32>
    %1462 = torch.aten.sigmoid %1461 : !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1463 = torch.aten.mul.Tensor %1460, %1462 : !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1464 = torch.aten.transpose.int %arg220, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32>
    %1465 = torch.aten.view %1463, %858 : !torch.vtensor<[1,77,3072],f32>, !torch.list<int> -> !torch.vtensor<[77,3072],f32>
    %1466 = torch.aten.mm %1465, %1464 : !torch.vtensor<[77,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[77,768],f32>
    %1467 = torch.aten.mul.Scalar %arg221, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1468 = torch.aten.add.Tensor %1467, %1466, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1469 = torch.aten.view %1468, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1470 = torch.aten.add.Tensor %1438, %1469, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1471 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1472 = torch.aten.sum.dim_IntList %1470, %1471, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1473 = torch.aten.div.Scalar %1472, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1474 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1475 = torch.aten.broadcast_to %1473, %1474 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1476 = torch.aten.sub.Tensor %1470, %1475, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1477 = torch.aten.mul.Tensor %1476, %1476 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1478 = torch.aten.sum.dim_IntList %1477, %1471, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1479 = torch.aten.div.Scalar %1478, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1480 = torch.aten.add.Scalar %1479, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1481 = torch.aten.rsqrt %1480 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1482 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1483 = torch.aten.broadcast_to %1481, %1482 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1484 = torch.aten.mul.Tensor %1476, %1483 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1485 = torch.aten.mul.Tensor %1484, %arg222 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1486 = torch.aten.add.Tensor %1485, %arg223, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1487 = torch.aten.transpose.int %arg224, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1488 = torch.aten.view %1486, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1489 = torch.aten.mm %1488, %1487 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1490 = torch.aten.mul.Scalar %arg225, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1491 = torch.aten.add.Tensor %1490, %1489, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1492 = torch.aten.view %1491, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1493 = torch.aten.mul.Scalar %1492, %float1.250000e-01 : !torch.vtensor<[1,77,768],f32>, !torch.float -> !torch.vtensor<[1,77,768],f32>
    %1494 = torch.aten.transpose.int %arg226, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1495 = torch.aten.view %1486, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1496 = torch.aten.mm %1495, %1494 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1497 = torch.aten.mul.Scalar %arg227, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1498 = torch.aten.add.Tensor %1497, %1496, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1499 = torch.aten.view %1498, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1500 = torch.aten.view %1499, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1501 = torch.aten.transpose.int %1500, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1502 = torch.aten.clone %1501, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1503 = torch.aten.transpose.int %arg228, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1504 = torch.aten.view %1486, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1505 = torch.aten.mm %1504, %1503 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1506 = torch.aten.mul.Scalar %arg229, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1507 = torch.aten.add.Tensor %1506, %1505, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1508 = torch.aten.view %1507, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1509 = torch.aten.view %1508, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1510 = torch.aten.transpose.int %1509, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1511 = torch.aten.clone %1510, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1512 = torch.aten.view %1493, %797 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1513 = torch.aten.transpose.int %1512, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1514 = torch.aten.clone %1513, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1515 = torch.aten.view %1514, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1516 = torch.aten.view %1502, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1517 = torch.aten.view %1511, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1518 = torch.aten.transpose.int %1516, %int1, %int2 : !torch.vtensor<[12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[12,64,77],f32>
    %1519 = torch.aten.bmm %1515, %1518 : !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1520 = torch.aten.view %1519, %807 : !torch.vtensor<[12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,77],f32>
    %1521 = torch.aten.add.Tensor %1520, %809, %int1 : !torch.vtensor<[1,12,77,77],f32>, !torch.vtensor<[1,1,77,77],f32>, !torch.int -> !torch.vtensor<[1,12,77,77],f32>
    %1522 = torch.aten.view %1521, %811 : !torch.vtensor<[1,12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[12,77,77],f32>
    %values_14, %indices_15 = torch.aten.max.dim %1522, %int-1, %true : !torch.vtensor<[12,77,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[12,77,1],f32>, !torch.vtensor<[12,77,1],si64>
    %1523 = torch.aten.sub.Tensor %1522, %values_14, %float1.000000e00 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32>, !torch.float -> !torch.vtensor<[12,77,77],f32>
    %1524 = torch.aten.exp %1523 : !torch.vtensor<[12,77,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1525 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1526 = torch.aten.sum.dim_IntList %1524, %1525, %true, %none : !torch.vtensor<[12,77,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[12,77,1],f32>
    %1527 = torch.aten.div.Tensor %1524, %1526 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32> -> !torch.vtensor<[12,77,77],f32>
    %1528 = torch.aten.bmm %1527, %1517 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32> -> !torch.vtensor<[12,77,64],f32>
    %1529 = torch.aten.view %1528, %819 : !torch.vtensor<[12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,64],f32>
    %1530 = torch.aten.transpose.int %1529, %int1, %int2 : !torch.vtensor<[1,12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1531 = torch.aten.clone %1530, %int0 : !torch.vtensor<[1,77,12,64],f32>, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1532 = torch.aten.view %1531, %775 : !torch.vtensor<[1,77,12,64],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1533 = torch.aten.transpose.int %arg230, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1534 = torch.aten.view %1532, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1535 = torch.aten.mm %1534, %1533 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1536 = torch.aten.mul.Scalar %arg231, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1537 = torch.aten.add.Tensor %1536, %1535, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1538 = torch.aten.view %1537, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1539 = torch.aten.add.Tensor %1470, %1538, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1540 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1541 = torch.aten.sum.dim_IntList %1539, %1540, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1542 = torch.aten.div.Scalar %1541, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1543 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1544 = torch.aten.broadcast_to %1542, %1543 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1545 = torch.aten.sub.Tensor %1539, %1544, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1546 = torch.aten.mul.Tensor %1545, %1545 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1547 = torch.aten.sum.dim_IntList %1546, %1540, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1548 = torch.aten.div.Scalar %1547, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1549 = torch.aten.add.Scalar %1548, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1550 = torch.aten.rsqrt %1549 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1551 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1552 = torch.aten.broadcast_to %1550, %1551 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1553 = torch.aten.mul.Tensor %1545, %1552 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1554 = torch.aten.mul.Tensor %1553, %arg232 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1555 = torch.aten.add.Tensor %1554, %arg233, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1556 = torch.aten.transpose.int %arg234, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32>
    %1557 = torch.aten.view %1555, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1558 = torch.aten.mm %1557, %1556 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[77,3072],f32>
    %1559 = torch.aten.mul.Scalar %arg235, %int1 : !torch.vtensor<[3072],f32>, !torch.int -> !torch.vtensor<[3072],f32>
    %1560 = torch.aten.add.Tensor %1559, %1558, %int1 : !torch.vtensor<[3072],f32>, !torch.vtensor<[77,3072],f32>, !torch.int -> !torch.vtensor<[77,3072],f32>
    %1561 = torch.aten.view %1560, %852 : !torch.vtensor<[77,3072],f32>, !torch.list<int> -> !torch.vtensor<[1,77,3072],f32>
    %1562 = torch.aten.mul.Scalar %1561, %float1.702000e00 : !torch.vtensor<[1,77,3072],f32>, !torch.float -> !torch.vtensor<[1,77,3072],f32>
    %1563 = torch.aten.sigmoid %1562 : !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1564 = torch.aten.mul.Tensor %1561, %1563 : !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1565 = torch.aten.transpose.int %arg236, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32>
    %1566 = torch.aten.view %1564, %858 : !torch.vtensor<[1,77,3072],f32>, !torch.list<int> -> !torch.vtensor<[77,3072],f32>
    %1567 = torch.aten.mm %1566, %1565 : !torch.vtensor<[77,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[77,768],f32>
    %1568 = torch.aten.mul.Scalar %arg237, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1569 = torch.aten.add.Tensor %1568, %1567, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1570 = torch.aten.view %1569, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1571 = torch.aten.add.Tensor %1539, %1570, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1572 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1573 = torch.aten.sum.dim_IntList %1571, %1572, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1574 = torch.aten.div.Scalar %1573, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1575 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1576 = torch.aten.broadcast_to %1574, %1575 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1577 = torch.aten.sub.Tensor %1571, %1576, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1578 = torch.aten.mul.Tensor %1577, %1577 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1579 = torch.aten.sum.dim_IntList %1578, %1572, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1580 = torch.aten.div.Scalar %1579, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1581 = torch.aten.add.Scalar %1580, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1582 = torch.aten.rsqrt %1581 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1583 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1584 = torch.aten.broadcast_to %1582, %1583 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1585 = torch.aten.mul.Tensor %1577, %1584 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1586 = torch.aten.mul.Tensor %1585, %arg238 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1587 = torch.aten.add.Tensor %1586, %arg239, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1588 = torch.aten.transpose.int %arg240, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1589 = torch.aten.view %1587, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1590 = torch.aten.mm %1589, %1588 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1591 = torch.aten.mul.Scalar %arg241, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1592 = torch.aten.add.Tensor %1591, %1590, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1593 = torch.aten.view %1592, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1594 = torch.aten.mul.Scalar %1593, %float1.250000e-01 : !torch.vtensor<[1,77,768],f32>, !torch.float -> !torch.vtensor<[1,77,768],f32>
    %1595 = torch.aten.transpose.int %arg242, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1596 = torch.aten.view %1587, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1597 = torch.aten.mm %1596, %1595 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1598 = torch.aten.mul.Scalar %arg243, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1599 = torch.aten.add.Tensor %1598, %1597, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1600 = torch.aten.view %1599, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1601 = torch.aten.view %1600, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1602 = torch.aten.transpose.int %1601, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1603 = torch.aten.clone %1602, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1604 = torch.aten.transpose.int %arg244, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1605 = torch.aten.view %1587, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1606 = torch.aten.mm %1605, %1604 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1607 = torch.aten.mul.Scalar %arg245, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1608 = torch.aten.add.Tensor %1607, %1606, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1609 = torch.aten.view %1608, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1610 = torch.aten.view %1609, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1611 = torch.aten.transpose.int %1610, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1612 = torch.aten.clone %1611, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1613 = torch.aten.view %1594, %797 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1614 = torch.aten.transpose.int %1613, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1615 = torch.aten.clone %1614, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1616 = torch.aten.view %1615, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1617 = torch.aten.view %1603, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1618 = torch.aten.view %1612, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1619 = torch.aten.transpose.int %1617, %int1, %int2 : !torch.vtensor<[12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[12,64,77],f32>
    %1620 = torch.aten.bmm %1616, %1619 : !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1621 = torch.aten.view %1620, %807 : !torch.vtensor<[12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,77],f32>
    %1622 = torch.aten.add.Tensor %1621, %809, %int1 : !torch.vtensor<[1,12,77,77],f32>, !torch.vtensor<[1,1,77,77],f32>, !torch.int -> !torch.vtensor<[1,12,77,77],f32>
    %1623 = torch.aten.view %1622, %811 : !torch.vtensor<[1,12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[12,77,77],f32>
    %values_16, %indices_17 = torch.aten.max.dim %1623, %int-1, %true : !torch.vtensor<[12,77,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[12,77,1],f32>, !torch.vtensor<[12,77,1],si64>
    %1624 = torch.aten.sub.Tensor %1623, %values_16, %float1.000000e00 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32>, !torch.float -> !torch.vtensor<[12,77,77],f32>
    %1625 = torch.aten.exp %1624 : !torch.vtensor<[12,77,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1626 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1627 = torch.aten.sum.dim_IntList %1625, %1626, %true, %none : !torch.vtensor<[12,77,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[12,77,1],f32>
    %1628 = torch.aten.div.Tensor %1625, %1627 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32> -> !torch.vtensor<[12,77,77],f32>
    %1629 = torch.aten.bmm %1628, %1618 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32> -> !torch.vtensor<[12,77,64],f32>
    %1630 = torch.aten.view %1629, %819 : !torch.vtensor<[12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,64],f32>
    %1631 = torch.aten.transpose.int %1630, %int1, %int2 : !torch.vtensor<[1,12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1632 = torch.aten.clone %1631, %int0 : !torch.vtensor<[1,77,12,64],f32>, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1633 = torch.aten.view %1632, %775 : !torch.vtensor<[1,77,12,64],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1634 = torch.aten.transpose.int %arg246, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1635 = torch.aten.view %1633, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1636 = torch.aten.mm %1635, %1634 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1637 = torch.aten.mul.Scalar %arg247, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1638 = torch.aten.add.Tensor %1637, %1636, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1639 = torch.aten.view %1638, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1640 = torch.aten.add.Tensor %1571, %1639, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1641 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1642 = torch.aten.sum.dim_IntList %1640, %1641, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1643 = torch.aten.div.Scalar %1642, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1644 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1645 = torch.aten.broadcast_to %1643, %1644 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1646 = torch.aten.sub.Tensor %1640, %1645, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1647 = torch.aten.mul.Tensor %1646, %1646 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1648 = torch.aten.sum.dim_IntList %1647, %1641, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1649 = torch.aten.div.Scalar %1648, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1650 = torch.aten.add.Scalar %1649, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1651 = torch.aten.rsqrt %1650 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1652 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1653 = torch.aten.broadcast_to %1651, %1652 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1654 = torch.aten.mul.Tensor %1646, %1653 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1655 = torch.aten.mul.Tensor %1654, %arg248 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1656 = torch.aten.add.Tensor %1655, %arg249, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1657 = torch.aten.transpose.int %arg250, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32>
    %1658 = torch.aten.view %1656, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1659 = torch.aten.mm %1658, %1657 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[77,3072],f32>
    %1660 = torch.aten.mul.Scalar %arg251, %int1 : !torch.vtensor<[3072],f32>, !torch.int -> !torch.vtensor<[3072],f32>
    %1661 = torch.aten.add.Tensor %1660, %1659, %int1 : !torch.vtensor<[3072],f32>, !torch.vtensor<[77,3072],f32>, !torch.int -> !torch.vtensor<[77,3072],f32>
    %1662 = torch.aten.view %1661, %852 : !torch.vtensor<[77,3072],f32>, !torch.list<int> -> !torch.vtensor<[1,77,3072],f32>
    %1663 = torch.aten.mul.Scalar %1662, %float1.702000e00 : !torch.vtensor<[1,77,3072],f32>, !torch.float -> !torch.vtensor<[1,77,3072],f32>
    %1664 = torch.aten.sigmoid %1663 : !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1665 = torch.aten.mul.Tensor %1662, %1664 : !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1666 = torch.aten.transpose.int %arg252, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32>
    %1667 = torch.aten.view %1665, %858 : !torch.vtensor<[1,77,3072],f32>, !torch.list<int> -> !torch.vtensor<[77,3072],f32>
    %1668 = torch.aten.mm %1667, %1666 : !torch.vtensor<[77,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[77,768],f32>
    %1669 = torch.aten.mul.Scalar %arg253, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1670 = torch.aten.add.Tensor %1669, %1668, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1671 = torch.aten.view %1670, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1672 = torch.aten.add.Tensor %1640, %1671, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1673 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1674 = torch.aten.sum.dim_IntList %1672, %1673, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1675 = torch.aten.div.Scalar %1674, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1676 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1677 = torch.aten.broadcast_to %1675, %1676 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1678 = torch.aten.sub.Tensor %1672, %1677, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1679 = torch.aten.mul.Tensor %1678, %1678 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1680 = torch.aten.sum.dim_IntList %1679, %1673, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1681 = torch.aten.div.Scalar %1680, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1682 = torch.aten.add.Scalar %1681, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1683 = torch.aten.rsqrt %1682 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1684 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1685 = torch.aten.broadcast_to %1683, %1684 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1686 = torch.aten.mul.Tensor %1678, %1685 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1687 = torch.aten.mul.Tensor %1686, %arg254 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1688 = torch.aten.add.Tensor %1687, %arg255, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1689 = torch.aten.transpose.int %arg256, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1690 = torch.aten.view %1688, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1691 = torch.aten.mm %1690, %1689 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1692 = torch.aten.mul.Scalar %arg257, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1693 = torch.aten.add.Tensor %1692, %1691, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1694 = torch.aten.view %1693, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1695 = torch.aten.mul.Scalar %1694, %float1.250000e-01 : !torch.vtensor<[1,77,768],f32>, !torch.float -> !torch.vtensor<[1,77,768],f32>
    %1696 = torch.aten.transpose.int %arg258, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1697 = torch.aten.view %1688, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1698 = torch.aten.mm %1697, %1696 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1699 = torch.aten.mul.Scalar %arg259, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1700 = torch.aten.add.Tensor %1699, %1698, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1701 = torch.aten.view %1700, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1702 = torch.aten.view %1701, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1703 = torch.aten.transpose.int %1702, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1704 = torch.aten.clone %1703, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1705 = torch.aten.transpose.int %arg260, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1706 = torch.aten.view %1688, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1707 = torch.aten.mm %1706, %1705 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1708 = torch.aten.mul.Scalar %arg261, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1709 = torch.aten.add.Tensor %1708, %1707, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1710 = torch.aten.view %1709, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1711 = torch.aten.view %1710, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1712 = torch.aten.transpose.int %1711, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1713 = torch.aten.clone %1712, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1714 = torch.aten.view %1695, %797 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1715 = torch.aten.transpose.int %1714, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1716 = torch.aten.clone %1715, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1717 = torch.aten.view %1716, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1718 = torch.aten.view %1704, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1719 = torch.aten.view %1713, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1720 = torch.aten.transpose.int %1718, %int1, %int2 : !torch.vtensor<[12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[12,64,77],f32>
    %1721 = torch.aten.bmm %1717, %1720 : !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1722 = torch.aten.view %1721, %807 : !torch.vtensor<[12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,77],f32>
    %1723 = torch.aten.add.Tensor %1722, %809, %int1 : !torch.vtensor<[1,12,77,77],f32>, !torch.vtensor<[1,1,77,77],f32>, !torch.int -> !torch.vtensor<[1,12,77,77],f32>
    %1724 = torch.aten.view %1723, %811 : !torch.vtensor<[1,12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[12,77,77],f32>
    %values_18, %indices_19 = torch.aten.max.dim %1724, %int-1, %true : !torch.vtensor<[12,77,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[12,77,1],f32>, !torch.vtensor<[12,77,1],si64>
    %1725 = torch.aten.sub.Tensor %1724, %values_18, %float1.000000e00 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32>, !torch.float -> !torch.vtensor<[12,77,77],f32>
    %1726 = torch.aten.exp %1725 : !torch.vtensor<[12,77,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1727 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1728 = torch.aten.sum.dim_IntList %1726, %1727, %true, %none : !torch.vtensor<[12,77,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[12,77,1],f32>
    %1729 = torch.aten.div.Tensor %1726, %1728 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32> -> !torch.vtensor<[12,77,77],f32>
    %1730 = torch.aten.bmm %1729, %1719 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32> -> !torch.vtensor<[12,77,64],f32>
    %1731 = torch.aten.view %1730, %819 : !torch.vtensor<[12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,64],f32>
    %1732 = torch.aten.transpose.int %1731, %int1, %int2 : !torch.vtensor<[1,12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1733 = torch.aten.clone %1732, %int0 : !torch.vtensor<[1,77,12,64],f32>, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1734 = torch.aten.view %1733, %775 : !torch.vtensor<[1,77,12,64],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1735 = torch.aten.transpose.int %arg262, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1736 = torch.aten.view %1734, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1737 = torch.aten.mm %1736, %1735 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1738 = torch.aten.mul.Scalar %arg263, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1739 = torch.aten.add.Tensor %1738, %1737, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1740 = torch.aten.view %1739, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1741 = torch.aten.add.Tensor %1672, %1740, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1742 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1743 = torch.aten.sum.dim_IntList %1741, %1742, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1744 = torch.aten.div.Scalar %1743, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1745 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1746 = torch.aten.broadcast_to %1744, %1745 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1747 = torch.aten.sub.Tensor %1741, %1746, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1748 = torch.aten.mul.Tensor %1747, %1747 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1749 = torch.aten.sum.dim_IntList %1748, %1742, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1750 = torch.aten.div.Scalar %1749, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1751 = torch.aten.add.Scalar %1750, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1752 = torch.aten.rsqrt %1751 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1753 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1754 = torch.aten.broadcast_to %1752, %1753 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1755 = torch.aten.mul.Tensor %1747, %1754 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1756 = torch.aten.mul.Tensor %1755, %arg264 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1757 = torch.aten.add.Tensor %1756, %arg265, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1758 = torch.aten.transpose.int %arg266, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32>
    %1759 = torch.aten.view %1757, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1760 = torch.aten.mm %1759, %1758 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[77,3072],f32>
    %1761 = torch.aten.mul.Scalar %arg267, %int1 : !torch.vtensor<[3072],f32>, !torch.int -> !torch.vtensor<[3072],f32>
    %1762 = torch.aten.add.Tensor %1761, %1760, %int1 : !torch.vtensor<[3072],f32>, !torch.vtensor<[77,3072],f32>, !torch.int -> !torch.vtensor<[77,3072],f32>
    %1763 = torch.aten.view %1762, %852 : !torch.vtensor<[77,3072],f32>, !torch.list<int> -> !torch.vtensor<[1,77,3072],f32>
    %1764 = torch.aten.mul.Scalar %1763, %float1.702000e00 : !torch.vtensor<[1,77,3072],f32>, !torch.float -> !torch.vtensor<[1,77,3072],f32>
    %1765 = torch.aten.sigmoid %1764 : !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1766 = torch.aten.mul.Tensor %1763, %1765 : !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1767 = torch.aten.transpose.int %arg268, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32>
    %1768 = torch.aten.view %1766, %858 : !torch.vtensor<[1,77,3072],f32>, !torch.list<int> -> !torch.vtensor<[77,3072],f32>
    %1769 = torch.aten.mm %1768, %1767 : !torch.vtensor<[77,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[77,768],f32>
    %1770 = torch.aten.mul.Scalar %arg269, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1771 = torch.aten.add.Tensor %1770, %1769, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1772 = torch.aten.view %1771, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1773 = torch.aten.add.Tensor %1741, %1772, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1774 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1775 = torch.aten.sum.dim_IntList %1773, %1774, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1776 = torch.aten.div.Scalar %1775, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1777 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1778 = torch.aten.broadcast_to %1776, %1777 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1779 = torch.aten.sub.Tensor %1773, %1778, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1780 = torch.aten.mul.Tensor %1779, %1779 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1781 = torch.aten.sum.dim_IntList %1780, %1774, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1782 = torch.aten.div.Scalar %1781, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1783 = torch.aten.add.Scalar %1782, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1784 = torch.aten.rsqrt %1783 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1785 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1786 = torch.aten.broadcast_to %1784, %1785 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1787 = torch.aten.mul.Tensor %1779, %1786 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1788 = torch.aten.mul.Tensor %1787, %arg270 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1789 = torch.aten.add.Tensor %1788, %arg271, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1790 = torch.aten.transpose.int %arg272, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1791 = torch.aten.view %1789, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1792 = torch.aten.mm %1791, %1790 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1793 = torch.aten.mul.Scalar %arg273, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1794 = torch.aten.add.Tensor %1793, %1792, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1795 = torch.aten.view %1794, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1796 = torch.aten.mul.Scalar %1795, %float1.250000e-01 : !torch.vtensor<[1,77,768],f32>, !torch.float -> !torch.vtensor<[1,77,768],f32>
    %1797 = torch.aten.transpose.int %arg274, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1798 = torch.aten.view %1789, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1799 = torch.aten.mm %1798, %1797 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1800 = torch.aten.mul.Scalar %arg275, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1801 = torch.aten.add.Tensor %1800, %1799, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1802 = torch.aten.view %1801, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1803 = torch.aten.view %1802, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1804 = torch.aten.transpose.int %1803, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1805 = torch.aten.clone %1804, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1806 = torch.aten.transpose.int %arg276, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1807 = torch.aten.view %1789, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1808 = torch.aten.mm %1807, %1806 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1809 = torch.aten.mul.Scalar %arg277, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1810 = torch.aten.add.Tensor %1809, %1808, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1811 = torch.aten.view %1810, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1812 = torch.aten.view %1811, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1813 = torch.aten.transpose.int %1812, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1814 = torch.aten.clone %1813, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1815 = torch.aten.view %1796, %797 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1816 = torch.aten.transpose.int %1815, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1817 = torch.aten.clone %1816, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1818 = torch.aten.view %1817, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1819 = torch.aten.view %1805, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1820 = torch.aten.view %1814, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1821 = torch.aten.transpose.int %1819, %int1, %int2 : !torch.vtensor<[12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[12,64,77],f32>
    %1822 = torch.aten.bmm %1818, %1821 : !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1823 = torch.aten.view %1822, %807 : !torch.vtensor<[12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,77],f32>
    %1824 = torch.aten.add.Tensor %1823, %809, %int1 : !torch.vtensor<[1,12,77,77],f32>, !torch.vtensor<[1,1,77,77],f32>, !torch.int -> !torch.vtensor<[1,12,77,77],f32>
    %1825 = torch.aten.view %1824, %811 : !torch.vtensor<[1,12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[12,77,77],f32>
    %values_20, %indices_21 = torch.aten.max.dim %1825, %int-1, %true : !torch.vtensor<[12,77,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[12,77,1],f32>, !torch.vtensor<[12,77,1],si64>
    %1826 = torch.aten.sub.Tensor %1825, %values_20, %float1.000000e00 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32>, !torch.float -> !torch.vtensor<[12,77,77],f32>
    %1827 = torch.aten.exp %1826 : !torch.vtensor<[12,77,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1828 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1829 = torch.aten.sum.dim_IntList %1827, %1828, %true, %none : !torch.vtensor<[12,77,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[12,77,1],f32>
    %1830 = torch.aten.div.Tensor %1827, %1829 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32> -> !torch.vtensor<[12,77,77],f32>
    %1831 = torch.aten.bmm %1830, %1820 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32> -> !torch.vtensor<[12,77,64],f32>
    %1832 = torch.aten.view %1831, %819 : !torch.vtensor<[12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,64],f32>
    %1833 = torch.aten.transpose.int %1832, %int1, %int2 : !torch.vtensor<[1,12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1834 = torch.aten.clone %1833, %int0 : !torch.vtensor<[1,77,12,64],f32>, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1835 = torch.aten.view %1834, %775 : !torch.vtensor<[1,77,12,64],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1836 = torch.aten.transpose.int %arg278, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1837 = torch.aten.view %1835, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1838 = torch.aten.mm %1837, %1836 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1839 = torch.aten.mul.Scalar %arg279, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1840 = torch.aten.add.Tensor %1839, %1838, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1841 = torch.aten.view %1840, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1842 = torch.aten.add.Tensor %1773, %1841, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1843 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1844 = torch.aten.sum.dim_IntList %1842, %1843, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1845 = torch.aten.div.Scalar %1844, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1846 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1847 = torch.aten.broadcast_to %1845, %1846 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1848 = torch.aten.sub.Tensor %1842, %1847, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1849 = torch.aten.mul.Tensor %1848, %1848 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1850 = torch.aten.sum.dim_IntList %1849, %1843, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1851 = torch.aten.div.Scalar %1850, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1852 = torch.aten.add.Scalar %1851, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1853 = torch.aten.rsqrt %1852 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1854 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1855 = torch.aten.broadcast_to %1853, %1854 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1856 = torch.aten.mul.Tensor %1848, %1855 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1857 = torch.aten.mul.Tensor %1856, %arg280 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1858 = torch.aten.add.Tensor %1857, %arg281, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1859 = torch.aten.transpose.int %arg282, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32>
    %1860 = torch.aten.view %1858, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1861 = torch.aten.mm %1860, %1859 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[77,3072],f32>
    %1862 = torch.aten.mul.Scalar %arg283, %int1 : !torch.vtensor<[3072],f32>, !torch.int -> !torch.vtensor<[3072],f32>
    %1863 = torch.aten.add.Tensor %1862, %1861, %int1 : !torch.vtensor<[3072],f32>, !torch.vtensor<[77,3072],f32>, !torch.int -> !torch.vtensor<[77,3072],f32>
    %1864 = torch.aten.view %1863, %852 : !torch.vtensor<[77,3072],f32>, !torch.list<int> -> !torch.vtensor<[1,77,3072],f32>
    %1865 = torch.aten.mul.Scalar %1864, %float1.702000e00 : !torch.vtensor<[1,77,3072],f32>, !torch.float -> !torch.vtensor<[1,77,3072],f32>
    %1866 = torch.aten.sigmoid %1865 : !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1867 = torch.aten.mul.Tensor %1864, %1866 : !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1868 = torch.aten.transpose.int %arg284, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32>
    %1869 = torch.aten.view %1867, %858 : !torch.vtensor<[1,77,3072],f32>, !torch.list<int> -> !torch.vtensor<[77,3072],f32>
    %1870 = torch.aten.mm %1869, %1868 : !torch.vtensor<[77,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[77,768],f32>
    %1871 = torch.aten.mul.Scalar %arg285, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1872 = torch.aten.add.Tensor %1871, %1870, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1873 = torch.aten.view %1872, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1874 = torch.aten.add.Tensor %1842, %1873, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1875 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1876 = torch.aten.sum.dim_IntList %1874, %1875, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1877 = torch.aten.div.Scalar %1876, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1878 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1879 = torch.aten.broadcast_to %1877, %1878 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1880 = torch.aten.sub.Tensor %1874, %1879, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1881 = torch.aten.mul.Tensor %1880, %1880 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1882 = torch.aten.sum.dim_IntList %1881, %1875, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1883 = torch.aten.div.Scalar %1882, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1884 = torch.aten.add.Scalar %1883, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1885 = torch.aten.rsqrt %1884 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1886 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1887 = torch.aten.broadcast_to %1885, %1886 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1888 = torch.aten.mul.Tensor %1880, %1887 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1889 = torch.aten.mul.Tensor %1888, %arg286 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1890 = torch.aten.add.Tensor %1889, %arg287, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1891 = torch.aten.transpose.int %arg288, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1892 = torch.aten.view %1890, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1893 = torch.aten.mm %1892, %1891 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1894 = torch.aten.mul.Scalar %arg289, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1895 = torch.aten.add.Tensor %1894, %1893, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1896 = torch.aten.view %1895, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1897 = torch.aten.mul.Scalar %1896, %float1.250000e-01 : !torch.vtensor<[1,77,768],f32>, !torch.float -> !torch.vtensor<[1,77,768],f32>
    %1898 = torch.aten.transpose.int %arg290, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1899 = torch.aten.view %1890, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1900 = torch.aten.mm %1899, %1898 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1901 = torch.aten.mul.Scalar %arg291, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1902 = torch.aten.add.Tensor %1901, %1900, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1903 = torch.aten.view %1902, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1904 = torch.aten.view %1903, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1905 = torch.aten.transpose.int %1904, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1906 = torch.aten.clone %1905, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1907 = torch.aten.transpose.int %arg292, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1908 = torch.aten.view %1890, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1909 = torch.aten.mm %1908, %1907 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1910 = torch.aten.mul.Scalar %arg293, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1911 = torch.aten.add.Tensor %1910, %1909, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1912 = torch.aten.view %1911, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1913 = torch.aten.view %1912, %784 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1914 = torch.aten.transpose.int %1913, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1915 = torch.aten.clone %1914, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1916 = torch.aten.view %1897, %797 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,12,64],f32>
    %1917 = torch.aten.transpose.int %1916, %int1, %int2 : !torch.vtensor<[1,77,12,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1918 = torch.aten.clone %1917, %int0 : !torch.vtensor<[1,12,77,64],f32>, !torch.int -> !torch.vtensor<[1,12,77,64],f32>
    %1919 = torch.aten.view %1918, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1920 = torch.aten.view %1906, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1921 = torch.aten.view %1915, %801 : !torch.vtensor<[1,12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[12,77,64],f32>
    %1922 = torch.aten.transpose.int %1920, %int1, %int2 : !torch.vtensor<[12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[12,64,77],f32>
    %1923 = torch.aten.bmm %1919, %1922 : !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1924 = torch.aten.view %1923, %807 : !torch.vtensor<[12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,77],f32>
    %1925 = torch.aten.add.Tensor %1924, %809, %int1 : !torch.vtensor<[1,12,77,77],f32>, !torch.vtensor<[1,1,77,77],f32>, !torch.int -> !torch.vtensor<[1,12,77,77],f32>
    %1926 = torch.aten.view %1925, %811 : !torch.vtensor<[1,12,77,77],f32>, !torch.list<int> -> !torch.vtensor<[12,77,77],f32>
    %values_22, %indices_23 = torch.aten.max.dim %1926, %int-1, %true : !torch.vtensor<[12,77,77],f32>, !torch.int, !torch.bool -> !torch.vtensor<[12,77,1],f32>, !torch.vtensor<[12,77,1],si64>
    %1927 = torch.aten.sub.Tensor %1926, %values_22, %float1.000000e00 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32>, !torch.float -> !torch.vtensor<[12,77,77],f32>
    %1928 = torch.aten.exp %1927 : !torch.vtensor<[12,77,77],f32> -> !torch.vtensor<[12,77,77],f32>
    %1929 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1930 = torch.aten.sum.dim_IntList %1928, %1929, %true, %none : !torch.vtensor<[12,77,77],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[12,77,1],f32>
    %1931 = torch.aten.div.Tensor %1928, %1930 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,1],f32> -> !torch.vtensor<[12,77,77],f32>
    %1932 = torch.aten.bmm %1931, %1921 : !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32> -> !torch.vtensor<[12,77,64],f32>
    %1933 = torch.aten.view %1932, %819 : !torch.vtensor<[12,77,64],f32>, !torch.list<int> -> !torch.vtensor<[1,12,77,64],f32>
    %1934 = torch.aten.transpose.int %1933, %int1, %int2 : !torch.vtensor<[1,12,77,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1935 = torch.aten.clone %1934, %int0 : !torch.vtensor<[1,77,12,64],f32>, !torch.int -> !torch.vtensor<[1,77,12,64],f32>
    %1936 = torch.aten.view %1935, %775 : !torch.vtensor<[1,77,12,64],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1937 = torch.aten.transpose.int %arg294, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32>
    %1938 = torch.aten.view %1936, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1939 = torch.aten.mm %1938, %1937 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[77,768],f32>
    %1940 = torch.aten.mul.Scalar %arg295, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1941 = torch.aten.add.Tensor %1940, %1939, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1942 = torch.aten.view %1941, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1943 = torch.aten.add.Tensor %1874, %1942, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1944 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1945 = torch.aten.sum.dim_IntList %1943, %1944, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1946 = torch.aten.div.Scalar %1945, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1947 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1948 = torch.aten.broadcast_to %1946, %1947 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1949 = torch.aten.sub.Tensor %1943, %1948, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1950 = torch.aten.mul.Tensor %1949, %1949 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1951 = torch.aten.sum.dim_IntList %1950, %1944, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1952 = torch.aten.div.Scalar %1951, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1953 = torch.aten.add.Scalar %1952, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1954 = torch.aten.rsqrt %1953 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1955 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1956 = torch.aten.broadcast_to %1954, %1955 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1957 = torch.aten.mul.Tensor %1949, %1956 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1958 = torch.aten.mul.Tensor %1957, %arg296 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1959 = torch.aten.add.Tensor %1958, %arg297, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1960 = torch.aten.transpose.int %arg298, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32>
    %1961 = torch.aten.view %1959, %770 : !torch.vtensor<[1,77,768],f32>, !torch.list<int> -> !torch.vtensor<[77,768],f32>
    %1962 = torch.aten.mm %1961, %1960 : !torch.vtensor<[77,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[77,3072],f32>
    %1963 = torch.aten.mul.Scalar %arg299, %int1 : !torch.vtensor<[3072],f32>, !torch.int -> !torch.vtensor<[3072],f32>
    %1964 = torch.aten.add.Tensor %1963, %1962, %int1 : !torch.vtensor<[3072],f32>, !torch.vtensor<[77,3072],f32>, !torch.int -> !torch.vtensor<[77,3072],f32>
    %1965 = torch.aten.view %1964, %852 : !torch.vtensor<[77,3072],f32>, !torch.list<int> -> !torch.vtensor<[1,77,3072],f32>
    %1966 = torch.aten.mul.Scalar %1965, %float1.702000e00 : !torch.vtensor<[1,77,3072],f32>, !torch.float -> !torch.vtensor<[1,77,3072],f32>
    %1967 = torch.aten.sigmoid %1966 : !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1968 = torch.aten.mul.Tensor %1965, %1967 : !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,3072],f32> -> !torch.vtensor<[1,77,3072],f32>
    %1969 = torch.aten.transpose.int %arg300, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32>
    %1970 = torch.aten.view %1968, %858 : !torch.vtensor<[1,77,3072],f32>, !torch.list<int> -> !torch.vtensor<[77,3072],f32>
    %1971 = torch.aten.mm %1970, %1969 : !torch.vtensor<[77,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[77,768],f32>
    %1972 = torch.aten.mul.Scalar %arg301, %int1 : !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[768],f32>
    %1973 = torch.aten.add.Tensor %1972, %1971, %int1 : !torch.vtensor<[768],f32>, !torch.vtensor<[77,768],f32>, !torch.int -> !torch.vtensor<[77,768],f32>
    %1974 = torch.aten.view %1973, %775 : !torch.vtensor<[77,768],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1975 = torch.aten.add.Tensor %1943, %1974, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1976 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
    %1977 = torch.aten.sum.dim_IntList %1975, %1976, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1978 = torch.aten.div.Scalar %1977, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1979 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1980 = torch.aten.broadcast_to %1978, %1979 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1981 = torch.aten.sub.Tensor %1975, %1980, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    %1982 = torch.aten.mul.Tensor %1981, %1981 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1983 = torch.aten.sum.dim_IntList %1982, %1976, %true, %none : !torch.vtensor<[1,77,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,77,1],f32>
    %1984 = torch.aten.div.Scalar %1983, %int768 : !torch.vtensor<[1,77,1],f32>, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1985 = torch.aten.add.Scalar %1984, %float1.000000e-05, %int1 : !torch.vtensor<[1,77,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,77,1],f32>
    %1986 = torch.aten.rsqrt %1985 : !torch.vtensor<[1,77,1],f32> -> !torch.vtensor<[1,77,1],f32>
    %1987 = torch.prim.ListConstruct %int1, %int77, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1988 = torch.aten.broadcast_to %1986, %1987 : !torch.vtensor<[1,77,1],f32>, !torch.list<int> -> !torch.vtensor<[1,77,768],f32>
    %1989 = torch.aten.mul.Tensor %1981, %1988 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1990 = torch.aten.mul.Tensor %1989, %arg302 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[1,77,768],f32>
    %1991 = torch.aten.add.Tensor %1990, %arg303, %int1 : !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[1,77,768],f32>
    return %742, %727, %1991, %723, %arg307, %1595, %1675, %arg206, %1662, %1268, %1487, %1313, %989, %1441, %1965, %1689, %1184, %1099, %1618, %1539, %1518, %1114, %1262, %1159, %1664, %1919, %1090, %1028, %1619, %1683, %1325, %arg216, %1588, %1113, %1111, %arg222, %1574, %1386, %1123, %1705, %1845, %1503, %864, %1380, %1527, %arg254, %744, %968, %1628, %1285, %1152, %1058, %1301, %976, %1556, %1372, %1616, %1432, %arg136, %1449, %1604, %1874, %1651, %arg232, %1056, %1533, %arg110, %1462, %1464, %1438, %arg238, %817, %1012, %1069, %1167, %1077, %1582, %1135, %1034, %1129, %arg120, %1640, %1717, %1773, %1634, %1060, %1741, %1161, %1460, %1797, %1363, %1022, %1426, %1417, %1561, %1666, %arg248, %1013, %arg126, %1455, %1494, %847, %1359, %1818, %1830, %arg264, %arg200, %955, %1696, %1517, %805, %857, %1864, %1316, %1542, %arg142, %1331, %1853, %1767, %1037, %804, %1820, %1720, %1821, %arg270, %1729, %959, %1898, %1340, %1719, %1470, %1045, %921, %1214, %1369, %1236, %1083, %855, %1348, %1315, %1051, %909, %1200, %1790, %1138, %911, %arg152, %1414, %1885, %1859, %1361, %1946, %1178, %1571, %1868, %1239, %1891, %1354, %875, %1866, %933, %arg168, %1550, %1765, %755, %944, %1657, %1836, %998, %1672, %763, %957, %1643, %1230, %1481, %1763, %arg158, %1752, %778, %965, %arg280, %927, %1967, %830, %950, %arg286, %853, %arg174, %1758, %1943, %1146, %888, %1402, %1907, %1744, %1563, %1515, %1969, %1247, %881, %824, %1170, %1776, %1735, %1010, %1565, %1292, %1784, %912, %833, %1258, %1806, %982, %arg296, %1224, %1253, %1473, %1271, %1393, %arg184, %769, %1986, %1215, %arg190, %1921, %867, %1931, %arg302, %1191, %1978, %1337, %1922, %1937, %1960, %1877, %841, %747, %1212, %1842, %1279, %1416, %1066, %1260, %936, %897, %802, %1157, %788, %1954, %1975 : !torch.vtensor<[1,4,64,64],f32>, !torch.vtensor<[1],si64>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,4,64,64],f32>, !torch.vtensor<[1000],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77],si64>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,77],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[12,64,77],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,768],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[12,77,64],f32>, !torch.vtensor<[1,77,3072],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,768],f32>
  }
}