Skip to content

Instantly share code, notes, and snippets.

@anijain2305
Last active October 8, 2022 00:38
Show Gist options
  • Save anijain2305/b713d3d04093ac78813e820d5e73fc31 to your computer and use it in GitHub Desktop.
Save anijain2305/b713d3d04093ac78813e820d5e73fc31 to your computer and use it in GitHub Desktop.
import triton
import triton.language as tl
from torchinductor.ir import ReductionHint
from torchinductor.triton_ops.autotune import pointwise
from torchinductor.utils import instance_descriptor
@pointwise(size_hints=[4194304], filename=__file__, meta={'signature': {0: '*fp32', 1: '*i64', 2: '*fp32', 3: '*fp32', 4: '*fp32', 5: '*fp32', 6: '*fp32', 7: '*fp32', 8: '*fp32', 9: '*fp32', 10: '*fp32', 11: '*fp32', 12: '*fp32', 13: '*fp32', 14: '*fp32', 15: '*fp32', 16: '*fp32', 17: '*fp32', 18: '*fp32', 19: '*fp32', 20: '*fp32', 21: '*fp32', 22: '*fp32', 23: '*fp32', 24: '*fp32', 25: '*fp32', 26: '*fp32', 27: '*fp32', 28: '*fp32', 29: '*fp32', 30: '*fp32', 31: '*fp32', 32: '*fp32', 33: '*fp32', 34: '*fp32', 35: '*fp32', 36: '*fp32', 37: '*fp32', 38: '*fp32', 39: '*fp32', 40: '*fp32', 41: '*fp32', 42: '*fp32', 43: '*fp32', 44: '*fp32', 45: '*fp32', 46: '*fp32', 47: '*fp32', 48: '*fp32', 49: '*fp32', 50: '*fp32', 51: '*fp32', 52: '*fp32', 53: '*fp32', 54: '*fp32', 55: '*fp32', 56: '*fp32', 57: '*fp32', 58: '*fp32', 59: '*fp32', 60: '*fp32', 61: '*fp32', 62: '*fp32', 63: '*fp32', 64: '*fp32', 65: '*fp32', 66: '*fp32', 67: '*fp32', 68: '*fp32', 69: '*fp32', 70: '*fp32', 71: '*fp32', 72: '*fp32', 73: '*fp32', 74: '*fp32', 75: '*fp32', 76: '*fp32', 77: '*fp32', 78: '*fp32', 79: '*fp32', 80: '*fp32', 81: '*fp32', 82: '*fp32', 83: '*fp32', 84: '*fp32', 85: '*fp32', 86: '*fp32', 87: '*fp32', 88: '*fp32', 89: '*fp32', 90: '*fp32', 91: '*fp32', 92: '*fp32', 93: '*fp32', 94: '*fp32', 95: '*fp32', 96: '*fp32', 97: '*fp32', 98: '*fp32', 99: '*fp32', 100: '*fp32', 101: '*fp32', 102: '*fp32', 103: '*fp32', 104: '*fp32', 105: '*fp32', 106: '*fp32', 107: '*fp32', 108: '*fp32', 109: '*fp32', 110: '*fp32', 111: '*fp32', 112: '*fp32', 113: '*fp32', 114: '*fp32', 115: '*fp32', 116: '*fp32', 117: '*fp32', 118: '*fp32', 119: '*fp32', 120: '*fp32', 121: '*fp32', 122: '*fp32', 123: '*fp32', 124: '*fp32', 125: '*fp32', 126: '*fp32', 127: '*fp32', 128: '*fp32', 129: '*fp32', 130: '*fp32', 131: '*fp32', 132: '*fp32', 133: '*fp32', 134: '*fp32', 135: '*fp32', 136: '*fp32', 137: '*fp32', 138: '*fp32', 139: '*fp32', 140: '*fp32', 141: '*fp32', 142: '*fp32', 143: 'i32'}, 'device': 0, 'configs': [instance_descriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142), equal_to_1=())], 'constants': {}})
@triton.jit
def kernel(in_ptr0, in_ptr1, in_ptr2, in_ptr3, in_ptr4, in_ptr5, in_ptr6, in_ptr7, in_ptr8, in_ptr9, in_ptr10, in_ptr11, in_ptr12, in_ptr13, in_ptr14, in_ptr15, in_ptr16, in_ptr17, in_ptr18, in_ptr19, in_ptr20, in_ptr21, in_ptr22, in_ptr23, in_ptr24, in_ptr25, in_ptr26, in_ptr27, in_ptr28, in_ptr29, in_ptr30, in_ptr31, in_ptr32, in_ptr33, in_ptr34, in_ptr35, in_ptr36, in_ptr37, in_ptr38, in_ptr39, in_ptr40, in_ptr41, in_ptr42, in_ptr43, in_ptr44, in_ptr45, in_ptr46, in_ptr47, in_ptr48, in_ptr49, in_ptr50, in_ptr51, in_ptr52, in_ptr53, in_ptr54, in_ptr55, in_ptr56, in_ptr57, in_ptr58, in_ptr59, in_ptr60, in_ptr61, in_ptr62, in_ptr63, in_ptr64, in_ptr65, in_ptr66, in_ptr67, in_ptr68, in_ptr69, in_ptr70, in_ptr71, in_ptr72, in_ptr73, in_ptr74, in_ptr75, in_ptr76, in_ptr77, in_ptr78, in_ptr79, in_ptr80, in_ptr81, in_ptr82, in_ptr83, in_ptr84, in_ptr85, in_ptr86, in_ptr87, in_ptr88, in_ptr89, in_ptr90, in_ptr91, in_ptr92, in_ptr93, in_ptr94, in_ptr95, in_ptr96, in_ptr97, in_ptr98, in_ptr99, in_ptr100, in_ptr101, in_ptr102, in_ptr103, in_ptr104, in_ptr105, in_ptr106, in_ptr107, in_ptr108, in_ptr109, in_ptr110, in_ptr111, in_ptr112, in_ptr113, in_ptr114, in_ptr115, in_ptr116, in_ptr117, in_ptr118, in_ptr119, in_ptr120, in_ptr121, in_ptr122, in_ptr123, in_ptr124, in_ptr125, in_ptr126, in_ptr127, in_ptr128, in_ptr129, in_ptr130, in_ptr131, in_ptr132, in_ptr133, in_ptr134, in_ptr135, in_ptr136, in_ptr137, in_ptr138, in_ptr139, in_ptr140, out_ptr0, out_ptr1, xnumel, XBLOCK : tl.constexpr):
xnumel = 4194304
xoffset = tl.program_id(0) * XBLOCK
xindex = xoffset + tl.reshape(tl.arange(0, XBLOCK), [XBLOCK])
xmask = xindex < xnumel
x2 = xindex
x1 = (xindex // 512)
tmp0 = tl.load(in_ptr0 + (x2), xmask)
tmp1 = tl.load(in_ptr1 + (0 + tl.zeros([XBLOCK], tl.int32)), None)
tmp7 = tl.load(in_ptr2 + (x2), xmask)
tmp11 = tl.load(in_ptr3 + (x2), xmask)
tmp13 = tl.load(in_ptr4 + (x1), xmask)
tmp21 = tl.load(in_ptr5 + (x2), xmask)
tmp24 = tl.load(in_ptr6 + (x2), xmask)
tmp26 = tl.load(in_ptr7 + (x1), xmask)
tmp34 = tl.load(in_ptr8 + (x2), xmask)
tmp37 = tl.load(in_ptr9 + (x2), xmask)
tmp39 = tl.load(in_ptr10 + (x1), xmask)
tmp47 = tl.load(in_ptr11 + (x2), xmask)
tmp50 = tl.load(in_ptr12 + (x2), xmask)
tmp52 = tl.load(in_ptr13 + (x1), xmask)
tmp60 = tl.load(in_ptr14 + (x2), xmask)
tmp63 = tl.load(in_ptr15 + (x2), xmask)
tmp65 = tl.load(in_ptr16 + (x1), xmask)
tmp73 = tl.load(in_ptr17 + (x2), xmask)
tmp76 = tl.load(in_ptr18 + (x2), xmask)
tmp78 = tl.load(in_ptr19 + (x1), xmask)
tmp86 = tl.load(in_ptr20 + (x2), xmask)
tmp89 = tl.load(in_ptr21 + (x2), xmask)
tmp91 = tl.load(in_ptr22 + (x1), xmask)
tmp99 = tl.load(in_ptr23 + (x2), xmask)
tmp102 = tl.load(in_ptr24 + (x2), xmask)
tmp104 = tl.load(in_ptr25 + (x1), xmask)
tmp112 = tl.load(in_ptr26 + (x2), xmask)
tmp115 = tl.load(in_ptr27 + (x2), xmask)
tmp117 = tl.load(in_ptr28 + (x1), xmask)
tmp125 = tl.load(in_ptr29 + (x2), xmask)
tmp128 = tl.load(in_ptr30 + (x2), xmask)
tmp130 = tl.load(in_ptr31 + (x1), xmask)
tmp138 = tl.load(in_ptr32 + (x2), xmask)
tmp141 = tl.load(in_ptr33 + (x2), xmask)
tmp143 = tl.load(in_ptr34 + (x1), xmask)
tmp151 = tl.load(in_ptr35 + (x2), xmask)
tmp154 = tl.load(in_ptr36 + (x2), xmask)
tmp156 = tl.load(in_ptr37 + (x1), xmask)
tmp164 = tl.load(in_ptr38 + (x2), xmask)
tmp167 = tl.load(in_ptr39 + (x2), xmask)
tmp169 = tl.load(in_ptr40 + (x1), xmask)
tmp177 = tl.load(in_ptr41 + (x2), xmask)
tmp180 = tl.load(in_ptr42 + (x2), xmask)
tmp182 = tl.load(in_ptr43 + (x1), xmask)
tmp190 = tl.load(in_ptr44 + (x2), xmask)
tmp193 = tl.load(in_ptr45 + (x2), xmask)
tmp195 = tl.load(in_ptr46 + (x1), xmask)
tmp203 = tl.load(in_ptr47 + (x2), xmask)
tmp206 = tl.load(in_ptr48 + (x2), xmask)
tmp208 = tl.load(in_ptr49 + (x1), xmask)
tmp216 = tl.load(in_ptr50 + (x2), xmask)
tmp219 = tl.load(in_ptr51 + (x2), xmask)
tmp221 = tl.load(in_ptr52 + (x1), xmask)
tmp229 = tl.load(in_ptr53 + (x2), xmask)
tmp232 = tl.load(in_ptr54 + (x2), xmask)
tmp234 = tl.load(in_ptr55 + (x1), xmask)
tmp242 = tl.load(in_ptr56 + (x2), xmask)
tmp245 = tl.load(in_ptr57 + (x2), xmask)
tmp247 = tl.load(in_ptr58 + (x1), xmask)
tmp255 = tl.load(in_ptr59 + (x2), xmask)
tmp258 = tl.load(in_ptr60 + (x2), xmask)
tmp260 = tl.load(in_ptr61 + (x1), xmask)
tmp268 = tl.load(in_ptr62 + (x2), xmask)
tmp271 = tl.load(in_ptr63 + (x2), xmask)
tmp273 = tl.load(in_ptr64 + (x1), xmask)
tmp281 = tl.load(in_ptr65 + (x2), xmask)
tmp284 = tl.load(in_ptr66 + (x2), xmask)
tmp286 = tl.load(in_ptr67 + (x1), xmask)
tmp294 = tl.load(in_ptr68 + (x2), xmask)
tmp297 = tl.load(in_ptr69 + (x2), xmask)
tmp299 = tl.load(in_ptr70 + (x1), xmask)
tmp303 = tl.load(in_ptr0 + (4194304 + x2), xmask)
tmp308 = tl.load(in_ptr2 + (4194304 + x2), xmask)
tmp311 = tl.load(in_ptr3 + (4194304 + x2), xmask)
tmp313 = tl.load(in_ptr4 + (8192 + x1), xmask)
tmp321 = tl.load(in_ptr5 + (4194304 + x2), xmask)
tmp324 = tl.load(in_ptr6 + (4194304 + x2), xmask)
tmp326 = tl.load(in_ptr7 + (8192 + x1), xmask)
tmp334 = tl.load(in_ptr8 + (4194304 + x2), xmask)
tmp337 = tl.load(in_ptr9 + (4194304 + x2), xmask)
tmp339 = tl.load(in_ptr10 + (8192 + x1), xmask)
tmp347 = tl.load(in_ptr11 + (4194304 + x2), xmask)
tmp350 = tl.load(in_ptr12 + (4194304 + x2), xmask)
tmp352 = tl.load(in_ptr13 + (8192 + x1), xmask)
tmp360 = tl.load(in_ptr14 + (4194304 + x2), xmask)
tmp363 = tl.load(in_ptr15 + (4194304 + x2), xmask)
tmp365 = tl.load(in_ptr16 + (8192 + x1), xmask)
tmp373 = tl.load(in_ptr17 + (4194304 + x2), xmask)
tmp376 = tl.load(in_ptr18 + (4194304 + x2), xmask)
tmp378 = tl.load(in_ptr19 + (8192 + x1), xmask)
tmp386 = tl.load(in_ptr20 + (4194304 + x2), xmask)
tmp389 = tl.load(in_ptr21 + (4194304 + x2), xmask)
tmp391 = tl.load(in_ptr22 + (8192 + x1), xmask)
tmp399 = tl.load(in_ptr23 + (4194304 + x2), xmask)
tmp402 = tl.load(in_ptr24 + (4194304 + x2), xmask)
tmp404 = tl.load(in_ptr25 + (8192 + x1), xmask)
tmp412 = tl.load(in_ptr26 + (4194304 + x2), xmask)
tmp415 = tl.load(in_ptr27 + (4194304 + x2), xmask)
tmp417 = tl.load(in_ptr28 + (8192 + x1), xmask)
tmp425 = tl.load(in_ptr29 + (4194304 + x2), xmask)
tmp428 = tl.load(in_ptr30 + (4194304 + x2), xmask)
tmp430 = tl.load(in_ptr31 + (8192 + x1), xmask)
tmp438 = tl.load(in_ptr32 + (4194304 + x2), xmask)
tmp441 = tl.load(in_ptr33 + (4194304 + x2), xmask)
tmp443 = tl.load(in_ptr34 + (8192 + x1), xmask)
tmp451 = tl.load(in_ptr35 + (4194304 + x2), xmask)
tmp454 = tl.load(in_ptr36 + (4194304 + x2), xmask)
tmp456 = tl.load(in_ptr37 + (8192 + x1), xmask)
tmp464 = tl.load(in_ptr38 + (4194304 + x2), xmask)
tmp467 = tl.load(in_ptr39 + (4194304 + x2), xmask)
tmp469 = tl.load(in_ptr40 + (8192 + x1), xmask)
tmp477 = tl.load(in_ptr41 + (4194304 + x2), xmask)
tmp480 = tl.load(in_ptr42 + (4194304 + x2), xmask)
tmp482 = tl.load(in_ptr43 + (8192 + x1), xmask)
tmp490 = tl.load(in_ptr44 + (4194304 + x2), xmask)
tmp493 = tl.load(in_ptr45 + (4194304 + x2), xmask)
tmp495 = tl.load(in_ptr46 + (8192 + x1), xmask)
tmp503 = tl.load(in_ptr47 + (4194304 + x2), xmask)
tmp506 = tl.load(in_ptr48 + (4194304 + x2), xmask)
tmp508 = tl.load(in_ptr49 + (8192 + x1), xmask)
tmp516 = tl.load(in_ptr50 + (4194304 + x2), xmask)
tmp519 = tl.load(in_ptr51 + (4194304 + x2), xmask)
tmp521 = tl.load(in_ptr52 + (8192 + x1), xmask)
tmp529 = tl.load(in_ptr53 + (4194304 + x2), xmask)
tmp532 = tl.load(in_ptr54 + (4194304 + x2), xmask)
tmp534 = tl.load(in_ptr55 + (8192 + x1), xmask)
tmp542 = tl.load(in_ptr56 + (4194304 + x2), xmask)
tmp545 = tl.load(in_ptr57 + (4194304 + x2), xmask)
tmp547 = tl.load(in_ptr58 + (8192 + x1), xmask)
tmp555 = tl.load(in_ptr59 + (4194304 + x2), xmask)
tmp558 = tl.load(in_ptr60 + (4194304 + x2), xmask)
tmp560 = tl.load(in_ptr61 + (8192 + x1), xmask)
tmp568 = tl.load(in_ptr62 + (4194304 + x2), xmask)
tmp571 = tl.load(in_ptr63 + (4194304 + x2), xmask)
tmp573 = tl.load(in_ptr64 + (8192 + x1), xmask)
tmp581 = tl.load(in_ptr65 + (4194304 + x2), xmask)
tmp584 = tl.load(in_ptr66 + (4194304 + x2), xmask)
tmp586 = tl.load(in_ptr67 + (8192 + x1), xmask)
tmp594 = tl.load(in_ptr68 + (4194304 + x2), xmask)
tmp597 = tl.load(in_ptr69 + (4194304 + x2), xmask)
tmp599 = tl.load(in_ptr70 + (8192 + x1), xmask)
tmp604 = tl.load(in_ptr71 + (x2), xmask)
tmp609 = tl.load(in_ptr72 + (x2), xmask)
tmp612 = tl.load(in_ptr73 + (x2), xmask)
tmp614 = tl.load(in_ptr74 + (x1), xmask)
tmp622 = tl.load(in_ptr75 + (x2), xmask)
tmp625 = tl.load(in_ptr76 + (x2), xmask)
tmp627 = tl.load(in_ptr77 + (x1), xmask)
tmp635 = tl.load(in_ptr78 + (x2), xmask)
tmp638 = tl.load(in_ptr79 + (x2), xmask)
tmp640 = tl.load(in_ptr80 + (x1), xmask)
tmp648 = tl.load(in_ptr81 + (x2), xmask)
tmp651 = tl.load(in_ptr82 + (x2), xmask)
tmp653 = tl.load(in_ptr83 + (x1), xmask)
tmp661 = tl.load(in_ptr84 + (x2), xmask)
tmp664 = tl.load(in_ptr85 + (x2), xmask)
tmp666 = tl.load(in_ptr86 + (x1), xmask)
tmp674 = tl.load(in_ptr87 + (x2), xmask)
tmp677 = tl.load(in_ptr88 + (x2), xmask)
tmp679 = tl.load(in_ptr89 + (x1), xmask)
tmp687 = tl.load(in_ptr90 + (x2), xmask)
tmp690 = tl.load(in_ptr91 + (x2), xmask)
tmp692 = tl.load(in_ptr92 + (x1), xmask)
tmp700 = tl.load(in_ptr93 + (x2), xmask)
tmp703 = tl.load(in_ptr94 + (x2), xmask)
tmp705 = tl.load(in_ptr95 + (x1), xmask)
tmp713 = tl.load(in_ptr96 + (x2), xmask)
tmp716 = tl.load(in_ptr97 + (x2), xmask)
tmp718 = tl.load(in_ptr98 + (x1), xmask)
tmp726 = tl.load(in_ptr99 + (x2), xmask)
tmp729 = tl.load(in_ptr100 + (x2), xmask)
tmp731 = tl.load(in_ptr101 + (x1), xmask)
tmp739 = tl.load(in_ptr102 + (x2), xmask)
tmp742 = tl.load(in_ptr103 + (x2), xmask)
tmp744 = tl.load(in_ptr104 + (x1), xmask)
tmp752 = tl.load(in_ptr105 + (x2), xmask)
tmp755 = tl.load(in_ptr106 + (x2), xmask)
tmp757 = tl.load(in_ptr107 + (x1), xmask)
tmp765 = tl.load(in_ptr108 + (x2), xmask)
tmp768 = tl.load(in_ptr109 + (x2), xmask)
tmp770 = tl.load(in_ptr110 + (x1), xmask)
tmp778 = tl.load(in_ptr111 + (x2), xmask)
tmp781 = tl.load(in_ptr112 + (x2), xmask)
tmp783 = tl.load(in_ptr113 + (x1), xmask)
tmp791 = tl.load(in_ptr114 + (x2), xmask)
tmp794 = tl.load(in_ptr115 + (x2), xmask)
tmp796 = tl.load(in_ptr116 + (x1), xmask)
tmp804 = tl.load(in_ptr117 + (x2), xmask)
tmp807 = tl.load(in_ptr118 + (x2), xmask)
tmp809 = tl.load(in_ptr119 + (x1), xmask)
tmp817 = tl.load(in_ptr120 + (x2), xmask)
tmp820 = tl.load(in_ptr121 + (x2), xmask)
tmp822 = tl.load(in_ptr122 + (x1), xmask)
tmp830 = tl.load(in_ptr123 + (x2), xmask)
tmp833 = tl.load(in_ptr124 + (x2), xmask)
tmp835 = tl.load(in_ptr125 + (x1), xmask)
tmp843 = tl.load(in_ptr126 + (x2), xmask)
tmp846 = tl.load(in_ptr127 + (x2), xmask)
tmp848 = tl.load(in_ptr128 + (x1), xmask)
tmp856 = tl.load(in_ptr129 + (x2), xmask)
tmp859 = tl.load(in_ptr130 + (x2), xmask)
tmp861 = tl.load(in_ptr131 + (x1), xmask)
tmp869 = tl.load(in_ptr132 + (x2), xmask)
tmp872 = tl.load(in_ptr133 + (x2), xmask)
tmp874 = tl.load(in_ptr134 + (x1), xmask)
tmp882 = tl.load(in_ptr135 + (x2), xmask)
tmp885 = tl.load(in_ptr136 + (x2), xmask)
tmp887 = tl.load(in_ptr137 + (x1), xmask)
tmp895 = tl.load(in_ptr138 + (x2), xmask)
tmp898 = tl.load(in_ptr139 + (x2), xmask)
tmp900 = tl.load(in_ptr140 + (x1), xmask)
tmp904 = tl.load(in_ptr71 + (4194304 + x2), xmask)
tmp909 = tl.load(in_ptr72 + (4194304 + x2), xmask)
tmp912 = tl.load(in_ptr73 + (4194304 + x2), xmask)
tmp914 = tl.load(in_ptr74 + (8192 + x1), xmask)
tmp922 = tl.load(in_ptr75 + (4194304 + x2), xmask)
tmp925 = tl.load(in_ptr76 + (4194304 + x2), xmask)
tmp927 = tl.load(in_ptr77 + (8192 + x1), xmask)
tmp935 = tl.load(in_ptr78 + (4194304 + x2), xmask)
tmp938 = tl.load(in_ptr79 + (4194304 + x2), xmask)
tmp940 = tl.load(in_ptr80 + (8192 + x1), xmask)
tmp948 = tl.load(in_ptr81 + (4194304 + x2), xmask)
tmp951 = tl.load(in_ptr82 + (4194304 + x2), xmask)
tmp953 = tl.load(in_ptr83 + (8192 + x1), xmask)
tmp961 = tl.load(in_ptr84 + (4194304 + x2), xmask)
tmp964 = tl.load(in_ptr85 + (4194304 + x2), xmask)
tmp966 = tl.load(in_ptr86 + (8192 + x1), xmask)
tmp974 = tl.load(in_ptr87 + (4194304 + x2), xmask)
tmp977 = tl.load(in_ptr88 + (4194304 + x2), xmask)
tmp979 = tl.load(in_ptr89 + (8192 + x1), xmask)
tmp987 = tl.load(in_ptr90 + (4194304 + x2), xmask)
tmp990 = tl.load(in_ptr91 + (4194304 + x2), xmask)
tmp992 = tl.load(in_ptr92 + (8192 + x1), xmask)
tmp1000 = tl.load(in_ptr93 + (4194304 + x2), xmask)
tmp1003 = tl.load(in_ptr94 + (4194304 + x2), xmask)
tmp1005 = tl.load(in_ptr95 + (8192 + x1), xmask)
tmp1013 = tl.load(in_ptr96 + (4194304 + x2), xmask)
tmp1016 = tl.load(in_ptr97 + (4194304 + x2), xmask)
tmp1018 = tl.load(in_ptr98 + (8192 + x1), xmask)
tmp1026 = tl.load(in_ptr99 + (4194304 + x2), xmask)
tmp1029 = tl.load(in_ptr100 + (4194304 + x2), xmask)
tmp1031 = tl.load(in_ptr101 + (8192 + x1), xmask)
tmp1039 = tl.load(in_ptr102 + (4194304 + x2), xmask)
tmp1042 = tl.load(in_ptr103 + (4194304 + x2), xmask)
tmp1044 = tl.load(in_ptr104 + (8192 + x1), xmask)
tmp1052 = tl.load(in_ptr105 + (4194304 + x2), xmask)
tmp1055 = tl.load(in_ptr106 + (4194304 + x2), xmask)
tmp1057 = tl.load(in_ptr107 + (8192 + x1), xmask)
tmp1065 = tl.load(in_ptr108 + (4194304 + x2), xmask)
tmp1068 = tl.load(in_ptr109 + (4194304 + x2), xmask)
tmp1070 = tl.load(in_ptr110 + (8192 + x1), xmask)
tmp1078 = tl.load(in_ptr111 + (4194304 + x2), xmask)
tmp1081 = tl.load(in_ptr112 + (4194304 + x2), xmask)
tmp1083 = tl.load(in_ptr113 + (8192 + x1), xmask)
tmp1091 = tl.load(in_ptr114 + (4194304 + x2), xmask)
tmp1094 = tl.load(in_ptr115 + (4194304 + x2), xmask)
tmp1096 = tl.load(in_ptr116 + (8192 + x1), xmask)
tmp1104 = tl.load(in_ptr117 + (4194304 + x2), xmask)
tmp1107 = tl.load(in_ptr118 + (4194304 + x2), xmask)
tmp1109 = tl.load(in_ptr119 + (8192 + x1), xmask)
tmp1117 = tl.load(in_ptr120 + (4194304 + x2), xmask)
tmp1120 = tl.load(in_ptr121 + (4194304 + x2), xmask)
tmp1122 = tl.load(in_ptr122 + (8192 + x1), xmask)
tmp1130 = tl.load(in_ptr123 + (4194304 + x2), xmask)
tmp1133 = tl.load(in_ptr124 + (4194304 + x2), xmask)
tmp1135 = tl.load(in_ptr125 + (8192 + x1), xmask)
tmp1143 = tl.load(in_ptr126 + (4194304 + x2), xmask)
tmp1146 = tl.load(in_ptr127 + (4194304 + x2), xmask)
tmp1148 = tl.load(in_ptr128 + (8192 + x1), xmask)
tmp1156 = tl.load(in_ptr129 + (4194304 + x2), xmask)
tmp1159 = tl.load(in_ptr130 + (4194304 + x2), xmask)
tmp1161 = tl.load(in_ptr131 + (8192 + x1), xmask)
tmp1169 = tl.load(in_ptr132 + (4194304 + x2), xmask)
tmp1172 = tl.load(in_ptr133 + (4194304 + x2), xmask)
tmp1174 = tl.load(in_ptr134 + (8192 + x1), xmask)
tmp1182 = tl.load(in_ptr135 + (4194304 + x2), xmask)
tmp1185 = tl.load(in_ptr136 + (4194304 + x2), xmask)
tmp1187 = tl.load(in_ptr137 + (8192 + x1), xmask)
tmp1195 = tl.load(in_ptr138 + (4194304 + x2), xmask)
tmp1198 = tl.load(in_ptr139 + (4194304 + x2), xmask)
tmp1200 = tl.load(in_ptr140 + (8192 + x1), xmask)
tmp2 = 570425344 + x2
tmp3 = tl.rand(tmp1, tmp2)
tmp4 = 0.1
tmp5 = tmp3 > tmp4
tmp6 = tmp5.to(tl.float32)
tmp8 = tmp6 * tmp7
tmp9 = 1.1111111111111112
tmp10 = tmp8 * tmp9
tmp12 = tmp10 * tmp11
tmp14 = tmp11 * tmp13
tmp15 = tmp12 - tmp14
tmp16 = tmp0 + tmp15
tmp17 = 553648128 + x2
tmp18 = tl.rand(tmp1, tmp17)
tmp19 = tmp18 > tmp4
tmp20 = tmp19.to(tl.float32)
tmp22 = tmp20 * tmp21
tmp23 = tmp22 * tmp9
tmp25 = tmp23 * tmp24
tmp27 = tmp24 * tmp26
tmp28 = tmp25 - tmp27
tmp29 = tmp16 + tmp28
tmp30 = 536870912 + x2
tmp31 = tl.rand(tmp1, tmp30)
tmp32 = tmp31 > tmp4
tmp33 = tmp32.to(tl.float32)
tmp35 = tmp33 * tmp34
tmp36 = tmp35 * tmp9
tmp38 = tmp36 * tmp37
tmp40 = tmp37 * tmp39
tmp41 = tmp38 - tmp40
tmp42 = tmp29 + tmp41
tmp43 = 520093696 + x2
tmp44 = tl.rand(tmp1, tmp43)
tmp45 = tmp44 > tmp4
tmp46 = tmp45.to(tl.float32)
tmp48 = tmp46 * tmp47
tmp49 = tmp48 * tmp9
tmp51 = tmp49 * tmp50
tmp53 = tmp50 * tmp52
tmp54 = tmp51 - tmp53
tmp55 = tmp42 + tmp54
tmp56 = 503316480 + x2
tmp57 = tl.rand(tmp1, tmp56)
tmp58 = tmp57 > tmp4
tmp59 = tmp58.to(tl.float32)
tmp61 = tmp59 * tmp60
tmp62 = tmp61 * tmp9
tmp64 = tmp62 * tmp63
tmp66 = tmp63 * tmp65
tmp67 = tmp64 - tmp66
tmp68 = tmp55 + tmp67
tmp69 = 486539264 + x2
tmp70 = tl.rand(tmp1, tmp69)
tmp71 = tmp70 > tmp4
tmp72 = tmp71.to(tl.float32)
tmp74 = tmp72 * tmp73
tmp75 = tmp74 * tmp9
tmp77 = tmp75 * tmp76
tmp79 = tmp76 * tmp78
tmp80 = tmp77 - tmp79
tmp81 = tmp68 + tmp80
tmp82 = 469762048 + x2
tmp83 = tl.rand(tmp1, tmp82)
tmp84 = tmp83 > tmp4
tmp85 = tmp84.to(tl.float32)
tmp87 = tmp85 * tmp86
tmp88 = tmp87 * tmp9
tmp90 = tmp88 * tmp89
tmp92 = tmp89 * tmp91
tmp93 = tmp90 - tmp92
tmp94 = tmp81 + tmp93
tmp95 = 452984832 + x2
tmp96 = tl.rand(tmp1, tmp95)
tmp97 = tmp96 > tmp4
tmp98 = tmp97.to(tl.float32)
tmp100 = tmp98 * tmp99
tmp101 = tmp100 * tmp9
tmp103 = tmp101 * tmp102
tmp105 = tmp102 * tmp104
tmp106 = tmp103 - tmp105
tmp107 = tmp94 + tmp106
tmp108 = 436207616 + x2
tmp109 = tl.rand(tmp1, tmp108)
tmp110 = tmp109 > tmp4
tmp111 = tmp110.to(tl.float32)
tmp113 = tmp111 * tmp112
tmp114 = tmp113 * tmp9
tmp116 = tmp114 * tmp115
tmp118 = tmp115 * tmp117
tmp119 = tmp116 - tmp118
tmp120 = tmp107 + tmp119
tmp121 = 419430400 + x2
tmp122 = tl.rand(tmp1, tmp121)
tmp123 = tmp122 > tmp4
tmp124 = tmp123.to(tl.float32)
tmp126 = tmp124 * tmp125
tmp127 = tmp126 * tmp9
tmp129 = tmp127 * tmp128
tmp131 = tmp128 * tmp130
tmp132 = tmp129 - tmp131
tmp133 = tmp120 + tmp132
tmp134 = 402653184 + x2
tmp135 = tl.rand(tmp1, tmp134)
tmp136 = tmp135 > tmp4
tmp137 = tmp136.to(tl.float32)
tmp139 = tmp137 * tmp138
tmp140 = tmp139 * tmp9
tmp142 = tmp140 * tmp141
tmp144 = tmp141 * tmp143
tmp145 = tmp142 - tmp144
tmp146 = tmp133 + tmp145
tmp147 = 385875968 + x2
tmp148 = tl.rand(tmp1, tmp147)
tmp149 = tmp148 > tmp4
tmp150 = tmp149.to(tl.float32)
tmp152 = tmp150 * tmp151
tmp153 = tmp152 * tmp9
tmp155 = tmp153 * tmp154
tmp157 = tmp154 * tmp156
tmp158 = tmp155 - tmp157
tmp159 = tmp146 + tmp158
tmp160 = 369098752 + x2
tmp161 = tl.rand(tmp1, tmp160)
tmp162 = tmp161 > tmp4
tmp163 = tmp162.to(tl.float32)
tmp165 = tmp163 * tmp164
tmp166 = tmp165 * tmp9
tmp168 = tmp166 * tmp167
tmp170 = tmp167 * tmp169
tmp171 = tmp168 - tmp170
tmp172 = tmp159 + tmp171
tmp173 = 352321536 + x2
tmp174 = tl.rand(tmp1, tmp173)
tmp175 = tmp174 > tmp4
tmp176 = tmp175.to(tl.float32)
tmp178 = tmp176 * tmp177
tmp179 = tmp178 * tmp9
tmp181 = tmp179 * tmp180
tmp183 = tmp180 * tmp182
tmp184 = tmp181 - tmp183
tmp185 = tmp172 + tmp184
tmp186 = 335544320 + x2
tmp187 = tl.rand(tmp1, tmp186)
tmp188 = tmp187 > tmp4
tmp189 = tmp188.to(tl.float32)
tmp191 = tmp189 * tmp190
tmp192 = tmp191 * tmp9
tmp194 = tmp192 * tmp193
tmp196 = tmp193 * tmp195
tmp197 = tmp194 - tmp196
tmp198 = tmp185 + tmp197
tmp199 = 318767104 + x2
tmp200 = tl.rand(tmp1, tmp199)
tmp201 = tmp200 > tmp4
tmp202 = tmp201.to(tl.float32)
tmp204 = tmp202 * tmp203
tmp205 = tmp204 * tmp9
tmp207 = tmp205 * tmp206
tmp209 = tmp206 * tmp208
tmp210 = tmp207 - tmp209
tmp211 = tmp198 + tmp210
tmp212 = 301989888 + x2
tmp213 = tl.rand(tmp1, tmp212)
tmp214 = tmp213 > tmp4
tmp215 = tmp214.to(tl.float32)
tmp217 = tmp215 * tmp216
tmp218 = tmp217 * tmp9
tmp220 = tmp218 * tmp219
tmp222 = tmp219 * tmp221
tmp223 = tmp220 - tmp222
tmp224 = tmp211 + tmp223
tmp225 = 285212672 + x2
tmp226 = tl.rand(tmp1, tmp225)
tmp227 = tmp226 > tmp4
tmp228 = tmp227.to(tl.float32)
tmp230 = tmp228 * tmp229
tmp231 = tmp230 * tmp9
tmp233 = tmp231 * tmp232
tmp235 = tmp232 * tmp234
tmp236 = tmp233 - tmp235
tmp237 = tmp224 + tmp236
tmp238 = 268435456 + x2
tmp239 = tl.rand(tmp1, tmp238)
tmp240 = tmp239 > tmp4
tmp241 = tmp240.to(tl.float32)
tmp243 = tmp241 * tmp242
tmp244 = tmp243 * tmp9
tmp246 = tmp244 * tmp245
tmp248 = tmp245 * tmp247
tmp249 = tmp246 - tmp248
tmp250 = tmp237 + tmp249
tmp251 = 251658240 + x2
tmp252 = tl.rand(tmp1, tmp251)
tmp253 = tmp252 > tmp4
tmp254 = tmp253.to(tl.float32)
tmp256 = tmp254 * tmp255
tmp257 = tmp256 * tmp9
tmp259 = tmp257 * tmp258
tmp261 = tmp258 * tmp260
tmp262 = tmp259 - tmp261
tmp263 = tmp250 + tmp262
tmp264 = 234881024 + x2
tmp265 = tl.rand(tmp1, tmp264)
tmp266 = tmp265 > tmp4
tmp267 = tmp266.to(tl.float32)
tmp269 = tmp267 * tmp268
tmp270 = tmp269 * tmp9
tmp272 = tmp270 * tmp271
tmp274 = tmp271 * tmp273
tmp275 = tmp272 - tmp274
tmp276 = tmp263 + tmp275
tmp277 = 218103808 + x2
tmp278 = tl.rand(tmp1, tmp277)
tmp279 = tmp278 > tmp4
tmp280 = tmp279.to(tl.float32)
tmp282 = tmp280 * tmp281
tmp283 = tmp282 * tmp9
tmp285 = tmp283 * tmp284
tmp287 = tmp284 * tmp286
tmp288 = tmp285 - tmp287
tmp289 = tmp276 + tmp288
tmp290 = 201326592 + x2
tmp291 = tl.rand(tmp1, tmp290)
tmp292 = tmp291 > tmp4
tmp293 = tmp292.to(tl.float32)
tmp295 = tmp293 * tmp294
tmp296 = tmp295 * tmp9
tmp298 = tmp296 * tmp297
tmp300 = tmp297 * tmp299
tmp301 = tmp298 - tmp300
tmp302 = tmp289 + tmp301
tmp304 = 574619648 + x2
tmp305 = tl.rand(tmp1, tmp304)
tmp306 = tmp305 > tmp4
tmp307 = tmp306.to(tl.float32)
tmp309 = tmp307 * tmp308
tmp310 = tmp309 * tmp9
tmp312 = tmp310 * tmp311
tmp314 = tmp311 * tmp313
tmp315 = tmp312 - tmp314
tmp316 = tmp303 + tmp315
tmp317 = 557842432 + x2
tmp318 = tl.rand(tmp1, tmp317)
tmp319 = tmp318 > tmp4
tmp320 = tmp319.to(tl.float32)
tmp322 = tmp320 * tmp321
tmp323 = tmp322 * tmp9
tmp325 = tmp323 * tmp324
tmp327 = tmp324 * tmp326
tmp328 = tmp325 - tmp327
tmp329 = tmp316 + tmp328
tmp330 = 541065216 + x2
tmp331 = tl.rand(tmp1, tmp330)
tmp332 = tmp331 > tmp4
tmp333 = tmp332.to(tl.float32)
tmp335 = tmp333 * tmp334
tmp336 = tmp335 * tmp9
tmp338 = tmp336 * tmp337
tmp340 = tmp337 * tmp339
tmp341 = tmp338 - tmp340
tmp342 = tmp329 + tmp341
tmp343 = 524288000 + x2
tmp344 = tl.rand(tmp1, tmp343)
tmp345 = tmp344 > tmp4
tmp346 = tmp345.to(tl.float32)
tmp348 = tmp346 * tmp347
tmp349 = tmp348 * tmp9
tmp351 = tmp349 * tmp350
tmp353 = tmp350 * tmp352
tmp354 = tmp351 - tmp353
tmp355 = tmp342 + tmp354
tmp356 = 507510784 + x2
tmp357 = tl.rand(tmp1, tmp356)
tmp358 = tmp357 > tmp4
tmp359 = tmp358.to(tl.float32)
tmp361 = tmp359 * tmp360
tmp362 = tmp361 * tmp9
tmp364 = tmp362 * tmp363
tmp366 = tmp363 * tmp365
tmp367 = tmp364 - tmp366
tmp368 = tmp355 + tmp367
tmp369 = 490733568 + x2
tmp370 = tl.rand(tmp1, tmp369)
tmp371 = tmp370 > tmp4
tmp372 = tmp371.to(tl.float32)
tmp374 = tmp372 * tmp373
tmp375 = tmp374 * tmp9
tmp377 = tmp375 * tmp376
tmp379 = tmp376 * tmp378
tmp380 = tmp377 - tmp379
tmp381 = tmp368 + tmp380
tmp382 = 473956352 + x2
tmp383 = tl.rand(tmp1, tmp382)
tmp384 = tmp383 > tmp4
tmp385 = tmp384.to(tl.float32)
tmp387 = tmp385 * tmp386
tmp388 = tmp387 * tmp9
tmp390 = tmp388 * tmp389
tmp392 = tmp389 * tmp391
tmp393 = tmp390 - tmp392
tmp394 = tmp381 + tmp393
tmp395 = 457179136 + x2
tmp396 = tl.rand(tmp1, tmp395)
tmp397 = tmp396 > tmp4
tmp398 = tmp397.to(tl.float32)
tmp400 = tmp398 * tmp399
tmp401 = tmp400 * tmp9
tmp403 = tmp401 * tmp402
tmp405 = tmp402 * tmp404
tmp406 = tmp403 - tmp405
tmp407 = tmp394 + tmp406
tmp408 = 440401920 + x2
tmp409 = tl.rand(tmp1, tmp408)
tmp410 = tmp409 > tmp4
tmp411 = tmp410.to(tl.float32)
tmp413 = tmp411 * tmp412
tmp414 = tmp413 * tmp9
tmp416 = tmp414 * tmp415
tmp418 = tmp415 * tmp417
tmp419 = tmp416 - tmp418
tmp420 = tmp407 + tmp419
tmp421 = 423624704 + x2
tmp422 = tl.rand(tmp1, tmp421)
tmp423 = tmp422 > tmp4
tmp424 = tmp423.to(tl.float32)
tmp426 = tmp424 * tmp425
tmp427 = tmp426 * tmp9
tmp429 = tmp427 * tmp428
tmp431 = tmp428 * tmp430
tmp432 = tmp429 - tmp431
tmp433 = tmp420 + tmp432
tmp434 = 406847488 + x2
tmp435 = tl.rand(tmp1, tmp434)
tmp436 = tmp435 > tmp4
tmp437 = tmp436.to(tl.float32)
tmp439 = tmp437 * tmp438
tmp440 = tmp439 * tmp9
tmp442 = tmp440 * tmp441
tmp444 = tmp441 * tmp443
tmp445 = tmp442 - tmp444
tmp446 = tmp433 + tmp445
tmp447 = 390070272 + x2
tmp448 = tl.rand(tmp1, tmp447)
tmp449 = tmp448 > tmp4
tmp450 = tmp449.to(tl.float32)
tmp452 = tmp450 * tmp451
tmp453 = tmp452 * tmp9
tmp455 = tmp453 * tmp454
tmp457 = tmp454 * tmp456
tmp458 = tmp455 - tmp457
tmp459 = tmp446 + tmp458
tmp460 = 373293056 + x2
tmp461 = tl.rand(tmp1, tmp460)
tmp462 = tmp461 > tmp4
tmp463 = tmp462.to(tl.float32)
tmp465 = tmp463 * tmp464
tmp466 = tmp465 * tmp9
tmp468 = tmp466 * tmp467
tmp470 = tmp467 * tmp469
tmp471 = tmp468 - tmp470
tmp472 = tmp459 + tmp471
tmp473 = 356515840 + x2
tmp474 = tl.rand(tmp1, tmp473)
tmp475 = tmp474 > tmp4
tmp476 = tmp475.to(tl.float32)
tmp478 = tmp476 * tmp477
tmp479 = tmp478 * tmp9
tmp481 = tmp479 * tmp480
tmp483 = tmp480 * tmp482
tmp484 = tmp481 - tmp483
tmp485 = tmp472 + tmp484
tmp486 = 339738624 + x2
tmp487 = tl.rand(tmp1, tmp486)
tmp488 = tmp487 > tmp4
tmp489 = tmp488.to(tl.float32)
tmp491 = tmp489 * tmp490
tmp492 = tmp491 * tmp9
tmp494 = tmp492 * tmp493
tmp496 = tmp493 * tmp495
tmp497 = tmp494 - tmp496
tmp498 = tmp485 + tmp497
tmp499 = 322961408 + x2
tmp500 = tl.rand(tmp1, tmp499)
tmp501 = tmp500 > tmp4
tmp502 = tmp501.to(tl.float32)
tmp504 = tmp502 * tmp503
tmp505 = tmp504 * tmp9
tmp507 = tmp505 * tmp506
tmp509 = tmp506 * tmp508
tmp510 = tmp507 - tmp509
tmp511 = tmp498 + tmp510
tmp512 = 306184192 + x2
tmp513 = tl.rand(tmp1, tmp512)
tmp514 = tmp513 > tmp4
tmp515 = tmp514.to(tl.float32)
tmp517 = tmp515 * tmp516
tmp518 = tmp517 * tmp9
tmp520 = tmp518 * tmp519
tmp522 = tmp519 * tmp521
tmp523 = tmp520 - tmp522
tmp524 = tmp511 + tmp523
tmp525 = 289406976 + x2
tmp526 = tl.rand(tmp1, tmp525)
tmp527 = tmp526 > tmp4
tmp528 = tmp527.to(tl.float32)
tmp530 = tmp528 * tmp529
tmp531 = tmp530 * tmp9
tmp533 = tmp531 * tmp532
tmp535 = tmp532 * tmp534
tmp536 = tmp533 - tmp535
tmp537 = tmp524 + tmp536
tmp538 = 272629760 + x2
tmp539 = tl.rand(tmp1, tmp538)
tmp540 = tmp539 > tmp4
tmp541 = tmp540.to(tl.float32)
tmp543 = tmp541 * tmp542
tmp544 = tmp543 * tmp9
tmp546 = tmp544 * tmp545
tmp548 = tmp545 * tmp547
tmp549 = tmp546 - tmp548
tmp550 = tmp537 + tmp549
tmp551 = 255852544 + x2
tmp552 = tl.rand(tmp1, tmp551)
tmp553 = tmp552 > tmp4
tmp554 = tmp553.to(tl.float32)
tmp556 = tmp554 * tmp555
tmp557 = tmp556 * tmp9
tmp559 = tmp557 * tmp558
tmp561 = tmp558 * tmp560
tmp562 = tmp559 - tmp561
tmp563 = tmp550 + tmp562
tmp564 = 239075328 + x2
tmp565 = tl.rand(tmp1, tmp564)
tmp566 = tmp565 > tmp4
tmp567 = tmp566.to(tl.float32)
tmp569 = tmp567 * tmp568
tmp570 = tmp569 * tmp9
tmp572 = tmp570 * tmp571
tmp574 = tmp571 * tmp573
tmp575 = tmp572 - tmp574
tmp576 = tmp563 + tmp575
tmp577 = 222298112 + x2
tmp578 = tl.rand(tmp1, tmp577)
tmp579 = tmp578 > tmp4
tmp580 = tmp579.to(tl.float32)
tmp582 = tmp580 * tmp581
tmp583 = tmp582 * tmp9
tmp585 = tmp583 * tmp584
tmp587 = tmp584 * tmp586
tmp588 = tmp585 - tmp587
tmp589 = tmp576 + tmp588
tmp590 = 205520896 + x2
tmp591 = tl.rand(tmp1, tmp590)
tmp592 = tmp591 > tmp4
tmp593 = tmp592.to(tl.float32)
tmp595 = tmp593 * tmp594
tmp596 = tmp595 * tmp9
tmp598 = tmp596 * tmp597
tmp600 = tmp597 * tmp599
tmp601 = tmp598 - tmp600
tmp602 = tmp589 + tmp601
tmp603 = tmp302 + tmp602
tmp605 = 184549376 + x2
tmp606 = tl.rand(tmp1, tmp605)
tmp607 = tmp606 > tmp4
tmp608 = tmp607.to(tl.float32)
tmp610 = tmp608 * tmp609
tmp611 = tmp610 * tmp9
tmp613 = tmp611 * tmp612
tmp615 = tmp612 * tmp614
tmp616 = tmp613 - tmp615
tmp617 = tmp604 + tmp616
tmp618 = 176160768 + x2
tmp619 = tl.rand(tmp1, tmp618)
tmp620 = tmp619 > tmp4
tmp621 = tmp620.to(tl.float32)
tmp623 = tmp621 * tmp622
tmp624 = tmp623 * tmp9
tmp626 = tmp624 * tmp625
tmp628 = tmp625 * tmp627
tmp629 = tmp626 - tmp628
tmp630 = tmp617 + tmp629
tmp631 = 167772160 + x2
tmp632 = tl.rand(tmp1, tmp631)
tmp633 = tmp632 > tmp4
tmp634 = tmp633.to(tl.float32)
tmp636 = tmp634 * tmp635
tmp637 = tmp636 * tmp9
tmp639 = tmp637 * tmp638
tmp641 = tmp638 * tmp640
tmp642 = tmp639 - tmp641
tmp643 = tmp630 + tmp642
tmp644 = 159383552 + x2
tmp645 = tl.rand(tmp1, tmp644)
tmp646 = tmp645 > tmp4
tmp647 = tmp646.to(tl.float32)
tmp649 = tmp647 * tmp648
tmp650 = tmp649 * tmp9
tmp652 = tmp650 * tmp651
tmp654 = tmp651 * tmp653
tmp655 = tmp652 - tmp654
tmp656 = tmp643 + tmp655
tmp657 = 150994944 + x2
tmp658 = tl.rand(tmp1, tmp657)
tmp659 = tmp658 > tmp4
tmp660 = tmp659.to(tl.float32)
tmp662 = tmp660 * tmp661
tmp663 = tmp662 * tmp9
tmp665 = tmp663 * tmp664
tmp667 = tmp664 * tmp666
tmp668 = tmp665 - tmp667
tmp669 = tmp656 + tmp668
tmp670 = 142606336 + x2
tmp671 = tl.rand(tmp1, tmp670)
tmp672 = tmp671 > tmp4
tmp673 = tmp672.to(tl.float32)
tmp675 = tmp673 * tmp674
tmp676 = tmp675 * tmp9
tmp678 = tmp676 * tmp677
tmp680 = tmp677 * tmp679
tmp681 = tmp678 - tmp680
tmp682 = tmp669 + tmp681
tmp683 = 134217728 + x2
tmp684 = tl.rand(tmp1, tmp683)
tmp685 = tmp684 > tmp4
tmp686 = tmp685.to(tl.float32)
tmp688 = tmp686 * tmp687
tmp689 = tmp688 * tmp9
tmp691 = tmp689 * tmp690
tmp693 = tmp690 * tmp692
tmp694 = tmp691 - tmp693
tmp695 = tmp682 + tmp694
tmp696 = 125829120 + x2
tmp697 = tl.rand(tmp1, tmp696)
tmp698 = tmp697 > tmp4
tmp699 = tmp698.to(tl.float32)
tmp701 = tmp699 * tmp700
tmp702 = tmp701 * tmp9
tmp704 = tmp702 * tmp703
tmp706 = tmp703 * tmp705
tmp707 = tmp704 - tmp706
tmp708 = tmp695 + tmp707
tmp709 = 117440512 + x2
tmp710 = tl.rand(tmp1, tmp709)
tmp711 = tmp710 > tmp4
tmp712 = tmp711.to(tl.float32)
tmp714 = tmp712 * tmp713
tmp715 = tmp714 * tmp9
tmp717 = tmp715 * tmp716
tmp719 = tmp716 * tmp718
tmp720 = tmp717 - tmp719
tmp721 = tmp708 + tmp720
tmp722 = 109051904 + x2
tmp723 = tl.rand(tmp1, tmp722)
tmp724 = tmp723 > tmp4
tmp725 = tmp724.to(tl.float32)
tmp727 = tmp725 * tmp726
tmp728 = tmp727 * tmp9
tmp730 = tmp728 * tmp729
tmp732 = tmp729 * tmp731
tmp733 = tmp730 - tmp732
tmp734 = tmp721 + tmp733
tmp735 = 100663296 + x2
tmp736 = tl.rand(tmp1, tmp735)
tmp737 = tmp736 > tmp4
tmp738 = tmp737.to(tl.float32)
tmp740 = tmp738 * tmp739
tmp741 = tmp740 * tmp9
tmp743 = tmp741 * tmp742
tmp745 = tmp742 * tmp744
tmp746 = tmp743 - tmp745
tmp747 = tmp734 + tmp746
tmp748 = 92274688 + x2
tmp749 = tl.rand(tmp1, tmp748)
tmp750 = tmp749 > tmp4
tmp751 = tmp750.to(tl.float32)
tmp753 = tmp751 * tmp752
tmp754 = tmp753 * tmp9
tmp756 = tmp754 * tmp755
tmp758 = tmp755 * tmp757
tmp759 = tmp756 - tmp758
tmp760 = tmp747 + tmp759
tmp761 = 83886080 + x2
tmp762 = tl.rand(tmp1, tmp761)
tmp763 = tmp762 > tmp4
tmp764 = tmp763.to(tl.float32)
tmp766 = tmp764 * tmp765
tmp767 = tmp766 * tmp9
tmp769 = tmp767 * tmp768
tmp771 = tmp768 * tmp770
tmp772 = tmp769 - tmp771
tmp773 = tmp760 + tmp772
tmp774 = 75497472 + x2
tmp775 = tl.rand(tmp1, tmp774)
tmp776 = tmp775 > tmp4
tmp777 = tmp776.to(tl.float32)
tmp779 = tmp777 * tmp778
tmp780 = tmp779 * tmp9
tmp782 = tmp780 * tmp781
tmp784 = tmp781 * tmp783
tmp785 = tmp782 - tmp784
tmp786 = tmp773 + tmp785
tmp787 = 67108864 + x2
tmp788 = tl.rand(tmp1, tmp787)
tmp789 = tmp788 > tmp4
tmp790 = tmp789.to(tl.float32)
tmp792 = tmp790 * tmp791
tmp793 = tmp792 * tmp9
tmp795 = tmp793 * tmp794
tmp797 = tmp794 * tmp796
tmp798 = tmp795 - tmp797
tmp799 = tmp786 + tmp798
tmp800 = 58720256 + x2
tmp801 = tl.rand(tmp1, tmp800)
tmp802 = tmp801 > tmp4
tmp803 = tmp802.to(tl.float32)
tmp805 = tmp803 * tmp804
tmp806 = tmp805 * tmp9
tmp808 = tmp806 * tmp807
tmp810 = tmp807 * tmp809
tmp811 = tmp808 - tmp810
tmp812 = tmp799 + tmp811
tmp813 = 50331648 + x2
tmp814 = tl.rand(tmp1, tmp813)
tmp815 = tmp814 > tmp4
tmp816 = tmp815.to(tl.float32)
tmp818 = tmp816 * tmp817
tmp819 = tmp818 * tmp9
tmp821 = tmp819 * tmp820
tmp823 = tmp820 * tmp822
tmp824 = tmp821 - tmp823
tmp825 = tmp812 + tmp824
tmp826 = 41943040 + x2
tmp827 = tl.rand(tmp1, tmp826)
tmp828 = tmp827 > tmp4
tmp829 = tmp828.to(tl.float32)
tmp831 = tmp829 * tmp830
tmp832 = tmp831 * tmp9
tmp834 = tmp832 * tmp833
tmp836 = tmp833 * tmp835
tmp837 = tmp834 - tmp836
tmp838 = tmp825 + tmp837
tmp839 = 33554432 + x2
tmp840 = tl.rand(tmp1, tmp839)
tmp841 = tmp840 > tmp4
tmp842 = tmp841.to(tl.float32)
tmp844 = tmp842 * tmp843
tmp845 = tmp844 * tmp9
tmp847 = tmp845 * tmp846
tmp849 = tmp846 * tmp848
tmp850 = tmp847 - tmp849
tmp851 = tmp838 + tmp850
tmp852 = 25165824 + x2
tmp853 = tl.rand(tmp1, tmp852)
tmp854 = tmp853 > tmp4
tmp855 = tmp854.to(tl.float32)
tmp857 = tmp855 * tmp856
tmp858 = tmp857 * tmp9
tmp860 = tmp858 * tmp859
tmp862 = tmp859 * tmp861
tmp863 = tmp860 - tmp862
tmp864 = tmp851 + tmp863
tmp865 = 16777216 + x2
tmp866 = tl.rand(tmp1, tmp865)
tmp867 = tmp866 > tmp4
tmp868 = tmp867.to(tl.float32)
tmp870 = tmp868 * tmp869
tmp871 = tmp870 * tmp9
tmp873 = tmp871 * tmp872
tmp875 = tmp872 * tmp874
tmp876 = tmp873 - tmp875
tmp877 = tmp864 + tmp876
tmp878 = 8388608 + x2
tmp879 = tl.rand(tmp1, tmp878)
tmp880 = tmp879 > tmp4
tmp881 = tmp880.to(tl.float32)
tmp883 = tmp881 * tmp882
tmp884 = tmp883 * tmp9
tmp886 = tmp884 * tmp885
tmp888 = tmp885 * tmp887
tmp889 = tmp886 - tmp888
tmp890 = tmp877 + tmp889
tmp891 = x2
tmp892 = tl.rand(tmp1, tmp891)
tmp893 = tmp892 > tmp4
tmp894 = tmp893.to(tl.float32)
tmp896 = tmp894 * tmp895
tmp897 = tmp896 * tmp9
tmp899 = tmp897 * tmp898
tmp901 = tmp898 * tmp900
tmp902 = tmp899 - tmp901
tmp903 = tmp890 + tmp902
tmp905 = 188743680 + x2
tmp906 = tl.rand(tmp1, tmp905)
tmp907 = tmp906 > tmp4
tmp908 = tmp907.to(tl.float32)
tmp910 = tmp908 * tmp909
tmp911 = tmp910 * tmp9
tmp913 = tmp911 * tmp912
tmp915 = tmp912 * tmp914
tmp916 = tmp913 - tmp915
tmp917 = tmp904 + tmp916
tmp918 = 180355072 + x2
tmp919 = tl.rand(tmp1, tmp918)
tmp920 = tmp919 > tmp4
tmp921 = tmp920.to(tl.float32)
tmp923 = tmp921 * tmp922
tmp924 = tmp923 * tmp9
tmp926 = tmp924 * tmp925
tmp928 = tmp925 * tmp927
tmp929 = tmp926 - tmp928
tmp930 = tmp917 + tmp929
tmp931 = 171966464 + x2
tmp932 = tl.rand(tmp1, tmp931)
tmp933 = tmp932 > tmp4
tmp934 = tmp933.to(tl.float32)
tmp936 = tmp934 * tmp935
tmp937 = tmp936 * tmp9
tmp939 = tmp937 * tmp938
tmp941 = tmp938 * tmp940
tmp942 = tmp939 - tmp941
tmp943 = tmp930 + tmp942
tmp944 = 163577856 + x2
tmp945 = tl.rand(tmp1, tmp944)
tmp946 = tmp945 > tmp4
tmp947 = tmp946.to(tl.float32)
tmp949 = tmp947 * tmp948
tmp950 = tmp949 * tmp9
tmp952 = tmp950 * tmp951
tmp954 = tmp951 * tmp953
tmp955 = tmp952 - tmp954
tmp956 = tmp943 + tmp955
tmp957 = 155189248 + x2
tmp958 = tl.rand(tmp1, tmp957)
tmp959 = tmp958 > tmp4
tmp960 = tmp959.to(tl.float32)
tmp962 = tmp960 * tmp961
tmp963 = tmp962 * tmp9
tmp965 = tmp963 * tmp964
tmp967 = tmp964 * tmp966
tmp968 = tmp965 - tmp967
tmp969 = tmp956 + tmp968
tmp970 = 146800640 + x2
tmp971 = tl.rand(tmp1, tmp970)
tmp972 = tmp971 > tmp4
tmp973 = tmp972.to(tl.float32)
tmp975 = tmp973 * tmp974
tmp976 = tmp975 * tmp9
tmp978 = tmp976 * tmp977
tmp980 = tmp977 * tmp979
tmp981 = tmp978 - tmp980
tmp982 = tmp969 + tmp981
tmp983 = 138412032 + x2
tmp984 = tl.rand(tmp1, tmp983)
tmp985 = tmp984 > tmp4
tmp986 = tmp985.to(tl.float32)
tmp988 = tmp986 * tmp987
tmp989 = tmp988 * tmp9
tmp991 = tmp989 * tmp990
tmp993 = tmp990 * tmp992
tmp994 = tmp991 - tmp993
tmp995 = tmp982 + tmp994
tmp996 = 130023424 + x2
tmp997 = tl.rand(tmp1, tmp996)
tmp998 = tmp997 > tmp4
tmp999 = tmp998.to(tl.float32)
tmp1001 = tmp999 * tmp1000
tmp1002 = tmp1001 * tmp9
tmp1004 = tmp1002 * tmp1003
tmp1006 = tmp1003 * tmp1005
tmp1007 = tmp1004 - tmp1006
tmp1008 = tmp995 + tmp1007
tmp1009 = 121634816 + x2
tmp1010 = tl.rand(tmp1, tmp1009)
tmp1011 = tmp1010 > tmp4
tmp1012 = tmp1011.to(tl.float32)
tmp1014 = tmp1012 * tmp1013
tmp1015 = tmp1014 * tmp9
tmp1017 = tmp1015 * tmp1016
tmp1019 = tmp1016 * tmp1018
tmp1020 = tmp1017 - tmp1019
tmp1021 = tmp1008 + tmp1020
tmp1022 = 113246208 + x2
tmp1023 = tl.rand(tmp1, tmp1022)
tmp1024 = tmp1023 > tmp4
tmp1025 = tmp1024.to(tl.float32)
tmp1027 = tmp1025 * tmp1026
tmp1028 = tmp1027 * tmp9
tmp1030 = tmp1028 * tmp1029
tmp1032 = tmp1029 * tmp1031
tmp1033 = tmp1030 - tmp1032
tmp1034 = tmp1021 + tmp1033
tmp1035 = 104857600 + x2
tmp1036 = tl.rand(tmp1, tmp1035)
tmp1037 = tmp1036 > tmp4
tmp1038 = tmp1037.to(tl.float32)
tmp1040 = tmp1038 * tmp1039
tmp1041 = tmp1040 * tmp9
tmp1043 = tmp1041 * tmp1042
tmp1045 = tmp1042 * tmp1044
tmp1046 = tmp1043 - tmp1045
tmp1047 = tmp1034 + tmp1046
tmp1048 = 96468992 + x2
tmp1049 = tl.rand(tmp1, tmp1048)
tmp1050 = tmp1049 > tmp4
tmp1051 = tmp1050.to(tl.float32)
tmp1053 = tmp1051 * tmp1052
tmp1054 = tmp1053 * tmp9
tmp1056 = tmp1054 * tmp1055
tmp1058 = tmp1055 * tmp1057
tmp1059 = tmp1056 - tmp1058
tmp1060 = tmp1047 + tmp1059
tmp1061 = 88080384 + x2
tmp1062 = tl.rand(tmp1, tmp1061)
tmp1063 = tmp1062 > tmp4
tmp1064 = tmp1063.to(tl.float32)
tmp1066 = tmp1064 * tmp1065
tmp1067 = tmp1066 * tmp9
tmp1069 = tmp1067 * tmp1068
tmp1071 = tmp1068 * tmp1070
tmp1072 = tmp1069 - tmp1071
tmp1073 = tmp1060 + tmp1072
tmp1074 = 79691776 + x2
tmp1075 = tl.rand(tmp1, tmp1074)
tmp1076 = tmp1075 > tmp4
tmp1077 = tmp1076.to(tl.float32)
tmp1079 = tmp1077 * tmp1078
tmp1080 = tmp1079 * tmp9
tmp1082 = tmp1080 * tmp1081
tmp1084 = tmp1081 * tmp1083
tmp1085 = tmp1082 - tmp1084
tmp1086 = tmp1073 + tmp1085
tmp1087 = 71303168 + x2
tmp1088 = tl.rand(tmp1, tmp1087)
tmp1089 = tmp1088 > tmp4
tmp1090 = tmp1089.to(tl.float32)
tmp1092 = tmp1090 * tmp1091
tmp1093 = tmp1092 * tmp9
tmp1095 = tmp1093 * tmp1094
tmp1097 = tmp1094 * tmp1096
tmp1098 = tmp1095 - tmp1097
tmp1099 = tmp1086 + tmp1098
tmp1100 = 62914560 + x2
tmp1101 = tl.rand(tmp1, tmp1100)
tmp1102 = tmp1101 > tmp4
tmp1103 = tmp1102.to(tl.float32)
tmp1105 = tmp1103 * tmp1104
tmp1106 = tmp1105 * tmp9
tmp1108 = tmp1106 * tmp1107
tmp1110 = tmp1107 * tmp1109
tmp1111 = tmp1108 - tmp1110
tmp1112 = tmp1099 + tmp1111
tmp1113 = 54525952 + x2
tmp1114 = tl.rand(tmp1, tmp1113)
tmp1115 = tmp1114 > tmp4
tmp1116 = tmp1115.to(tl.float32)
tmp1118 = tmp1116 * tmp1117
tmp1119 = tmp1118 * tmp9
tmp1121 = tmp1119 * tmp1120
tmp1123 = tmp1120 * tmp1122
tmp1124 = tmp1121 - tmp1123
tmp1125 = tmp1112 + tmp1124
tmp1126 = 46137344 + x2
tmp1127 = tl.rand(tmp1, tmp1126)
tmp1128 = tmp1127 > tmp4
tmp1129 = tmp1128.to(tl.float32)
tmp1131 = tmp1129 * tmp1130
tmp1132 = tmp1131 * tmp9
tmp1134 = tmp1132 * tmp1133
tmp1136 = tmp1133 * tmp1135
tmp1137 = tmp1134 - tmp1136
tmp1138 = tmp1125 + tmp1137
tmp1139 = 37748736 + x2
tmp1140 = tl.rand(tmp1, tmp1139)
tmp1141 = tmp1140 > tmp4
tmp1142 = tmp1141.to(tl.float32)
tmp1144 = tmp1142 * tmp1143
tmp1145 = tmp1144 * tmp9
tmp1147 = tmp1145 * tmp1146
tmp1149 = tmp1146 * tmp1148
tmp1150 = tmp1147 - tmp1149
tmp1151 = tmp1138 + tmp1150
tmp1152 = 29360128 + x2
tmp1153 = tl.rand(tmp1, tmp1152)
tmp1154 = tmp1153 > tmp4
tmp1155 = tmp1154.to(tl.float32)
tmp1157 = tmp1155 * tmp1156
tmp1158 = tmp1157 * tmp9
tmp1160 = tmp1158 * tmp1159
tmp1162 = tmp1159 * tmp1161
tmp1163 = tmp1160 - tmp1162
tmp1164 = tmp1151 + tmp1163
tmp1165 = 20971520 + x2
tmp1166 = tl.rand(tmp1, tmp1165)
tmp1167 = tmp1166 > tmp4
tmp1168 = tmp1167.to(tl.float32)
tmp1170 = tmp1168 * tmp1169
tmp1171 = tmp1170 * tmp9
tmp1173 = tmp1171 * tmp1172
tmp1175 = tmp1172 * tmp1174
tmp1176 = tmp1173 - tmp1175
tmp1177 = tmp1164 + tmp1176
tmp1178 = 12582912 + x2
tmp1179 = tl.rand(tmp1, tmp1178)
tmp1180 = tmp1179 > tmp4
tmp1181 = tmp1180.to(tl.float32)
tmp1183 = tmp1181 * tmp1182
tmp1184 = tmp1183 * tmp9
tmp1186 = tmp1184 * tmp1185
tmp1188 = tmp1185 * tmp1187
tmp1189 = tmp1186 - tmp1188
tmp1190 = tmp1177 + tmp1189
tmp1191 = 4194304 + x2
tmp1192 = tl.rand(tmp1, tmp1191)
tmp1193 = tmp1192 > tmp4
tmp1194 = tmp1193.to(tl.float32)
tmp1196 = tmp1194 * tmp1195
tmp1197 = tmp1196 * tmp9
tmp1199 = tmp1197 * tmp1198
tmp1201 = tmp1198 * tmp1200
tmp1202 = tmp1199 - tmp1201
tmp1203 = tmp1190 + tmp1202
tmp1204 = tmp903 + tmp1203
tl.store(out_ptr0 + (x2 + tl.zeros([XBLOCK], tl.int32)), tmp603, xmask)
tl.store(out_ptr1 + (x2 + tl.zeros([XBLOCK], tl.int32)), tmp1204, xmask)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment