Skip to content

Instantly share code, notes, and snippets.

@BtbN
Created November 3, 2020 20:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BtbN/3ddad5d136208c45badd75f994cf7a70 to your computer and use it in GitHub Desktop.
Save BtbN/3ddad5d136208c45badd75f994cf7a70 to your computer and use it in GitHub Desktop.
.visible .entry Subsample_Bicubic_uchar(
.param .u64 Subsample_Bicubic_uchar_param_0,
.param .u64 Subsample_Bicubic_uchar_param_1,
.param .u32 Subsample_Bicubic_uchar_param_2,
.param .u32 Subsample_Bicubic_uchar_param_3,
.param .u32 Subsample_Bicubic_uchar_param_4,
.param .u32 Subsample_Bicubic_uchar_param_5,
.param .u32 Subsample_Bicubic_uchar_param_6,
.param .u32 Subsample_Bicubic_uchar_param_7
)
{
.reg .pred %p<5>;
.reg .f32 %f<138>;
.reg .b32 %r<17>;
.reg .b64 %rd<6>;
ld.param.u64 %rd1, [Subsample_Bicubic_uchar_param_0];
ld.param.u64 %rd2, [Subsample_Bicubic_uchar_param_1];
ld.param.u32 %r3, [Subsample_Bicubic_uchar_param_2];
ld.param.u32 %r4, [Subsample_Bicubic_uchar_param_3];
ld.param.u32 %r5, [Subsample_Bicubic_uchar_param_4];
ld.param.u32 %r6, [Subsample_Bicubic_uchar_param_5];
ld.param.u32 %r7, [Subsample_Bicubic_uchar_param_6];
ld.param.u32 %r8, [Subsample_Bicubic_uchar_param_7];
mov.u32 %r9, %ctaid.x;
mov.u32 %r10, %ntid.x;
mov.u32 %r11, %tid.x;
mad.lo.s32 %r1, %r10, %r9, %r11;
mov.u32 %r12, %ntid.y;
mov.u32 %r13, %ctaid.y;
mov.u32 %r14, %tid.y;
mad.lo.s32 %r2, %r12, %r13, %r14;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p1, %p2;
@%p3 bra BB0_2;
cvt.rn.f32.s32 %f1, %r6;
cvt.rn.f32.s32 %f2, %r3;
div.rn.f32 %f3, %f1, %f2;
cvt.rn.f32.s32 %f4, %r4;
cvt.rn.f32.s32 %f5, %r7;
div.rn.f32 %f6, %f5, %f4;
cvt.rn.f32.s32 %f7, %r1;
add.f32 %f8, %f7, 0f3F000000;
fma.rn.f32 %f9, %f8, %f3, 0fBF000000;
cvt.rn.f32.s32 %f10, %r2;
add.f32 %f11, %f10, 0f3F000000;
fma.rn.f32 %f12, %f11, %f6, 0fBF000000;
cvt.rmi.f32.f32 %f13, %f9;
cvt.rmi.f32.f32 %f14, %f12;
add.f32 %f15, %f13, 0fBF800000;
add.f32 %f16, %f14, 0fBF800000;
tex.2d.v4.f32.f32 {%f17, %f18, %f19, %f20}, [%rd1, {%f15, %f16}];
tex.2d.v4.f32.f32 {%f21, %f22, %f23, %f24}, [%rd1, {%f13, %f16}];
add.f32 %f25, %f13, 0f3F800000;
tex.2d.v4.f32.f32 {%f26, %f27, %f28, %f29}, [%rd1, {%f25, %f16}];
add.f32 %f30, %f13, 0f40000000;
tex.2d.v4.f32.f32 {%f31, %f32, %f33, %f34}, [%rd1, {%f30, %f16}];
sub.f32 %f35, %f9, %f13;
add.f32 %f36, %f35, 0f3F800000;
fma.rn.f32 %f37, %f36, 0fBF400000, 0f40700000;
mov.f32 %f38, 0f3F800000;
sub.f32 %f39, %f38, %f35;
fma.rn.f32 %f40, %f35, 0f3FA00000, 0fC0100000;
fma.rn.f32 %f41, %f36, %f37, 0fC0C00000;
mul.f32 %f42, %f35, %f40;
fma.rn.f32 %f43, %f39, 0f3FA00000, 0fC0100000;
fma.rn.f32 %f44, %f36, %f41, 0f40400000;
mul.f32 %f45, %f39, %f43;
fma.rn.f32 %f46, %f35, %f42, 0f3F800000;
sub.f32 %f47, %f38, %f44;
sub.f32 %f48, %f47, %f46;
fma.rn.f32 %f49, %f39, %f45, 0f3F800000;
sub.f32 %f50, %f48, %f49;
mul.f32 %f51, %f21, %f46;
fma.rn.f32 %f52, %f17, %f44, %f51;
fma.rn.f32 %f53, %f26, %f49, %f52;
fma.rn.f32 %f54, %f31, %f50, %f53;
tex.2d.v4.f32.f32 {%f55, %f56, %f57, %f58}, [%rd1, {%f15, %f14}];
tex.2d.v4.f32.f32 {%f59, %f60, %f61, %f62}, [%rd1, {%f13, %f14}];
tex.2d.v4.f32.f32 {%f63, %f64, %f65, %f66}, [%rd1, {%f25, %f14}];
tex.2d.v4.f32.f32 {%f67, %f68, %f69, %f70}, [%rd1, {%f30, %f14}];
mul.f32 %f71, %f59, %f46;
fma.rn.f32 %f72, %f55, %f44, %f71;
fma.rn.f32 %f73, %f63, %f49, %f72;
fma.rn.f32 %f74, %f67, %f50, %f73;
add.f32 %f75, %f14, 0f3F800000;
tex.2d.v4.f32.f32 {%f76, %f77, %f78, %f79}, [%rd1, {%f15, %f75}];
tex.2d.v4.f32.f32 {%f80, %f81, %f82, %f83}, [%rd1, {%f13, %f75}];
tex.2d.v4.f32.f32 {%f84, %f85, %f86, %f87}, [%rd1, {%f25, %f75}];
tex.2d.v4.f32.f32 {%f88, %f89, %f90, %f91}, [%rd1, {%f30, %f75}];
mul.f32 %f92, %f80, %f46;
fma.rn.f32 %f93, %f76, %f44, %f92;
fma.rn.f32 %f94, %f84, %f49, %f93;
fma.rn.f32 %f95, %f50, %f88, %f94;
add.f32 %f96, %f14, 0f40000000;
tex.2d.v4.f32.f32 {%f97, %f98, %f99, %f100}, [%rd1, {%f15, %f96}];
tex.2d.v4.f32.f32 {%f101, %f102, %f103, %f104}, [%rd1, {%f13, %f96}];
tex.2d.v4.f32.f32 {%f105, %f106, %f107, %f108}, [%rd1, {%f25, %f96}];
tex.2d.v4.f32.f32 {%f109, %f110, %f111, %f112}, [%rd1, {%f30, %f96}];
sub.f32 %f113, %f12, %f14;
add.f32 %f114, %f113, 0f3F800000;
fma.rn.f32 %f115, %f114, 0fBF400000, 0f40700000;
sub.f32 %f116, %f38, %f113;
fma.rn.f32 %f117, %f113, 0f3FA00000, 0fC0100000;
fma.rn.f32 %f118, %f114, %f115, 0fC0C00000;
mul.f32 %f119, %f113, %f117;
fma.rn.f32 %f120, %f116, 0f3FA00000, 0fC0100000;
fma.rn.f32 %f121, %f114, %f118, 0f40400000;
mul.f32 %f122, %f116, %f120;
fma.rn.f32 %f123, %f113, %f119, 0f3F800000;
sub.f32 %f124, %f38, %f121;
sub.f32 %f125, %f124, %f123;
fma.rn.f32 %f126, %f116, %f122, 0f3F800000;
sub.f32 %f127, %f125, %f126;
setp.gt.s32 %p4, %r8, 8;
selp.f32 %f128, 0f477FFF00, 0f437F0000, %p4;
mul.f32 %f129, %f46, %f101;
fma.rn.f32 %f130, %f44, %f97, %f129;
fma.rn.f32 %f131, %f49, %f105, %f130;
fma.rn.f32 %f132, %f50, %f109, %f131;
mul.f32 %f133, %f123, %f74;
fma.rn.f32 %f134, %f121, %f54, %f133;
fma.rn.f32 %f135, %f126, %f95, %f134;
fma.rn.f32 %f136, %f127, %f132, %f135;
mul.f32 %f137, %f128, %f136;
cvt.rzi.u32.f32 %r15, %f137;
mad.lo.s32 %r16, %r2, %r5, %r1;
cvta.to.global.u64 %rd3, %rd2;
cvt.s64.s32 %rd4, %r16;
add.s64 %rd5, %rd3, %rd4;
st.global.u8 [%rd5], %r15;
BB0_2:
ret;
}
// .globl Subsample_Lanczos_uchar
.visible .entry Subsample_Lanczos_uchar(
.param .u64 Subsample_Lanczos_uchar_param_0,
.param .u64 Subsample_Lanczos_uchar_param_1,
.param .u32 Subsample_Lanczos_uchar_param_2,
.param .u32 Subsample_Lanczos_uchar_param_3,
.param .u32 Subsample_Lanczos_uchar_param_4,
.param .u32 Subsample_Lanczos_uchar_param_5,
.param .u32 Subsample_Lanczos_uchar_param_6,
.param .u32 Subsample_Lanczos_uchar_param_7
)
{
.local .align 4 .b8 __local_depot6[28];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<221>;
.reg .f32 %f<953>;
.reg .b32 %r<1480>;
.reg .b64 %rd<179>;
mov.u64 %SPL, __local_depot6;
cvta.local.u64 %SP, %SPL;
ld.param.u64 %rd97, [Subsample_Lanczos_uchar_param_0];
ld.param.u64 %rd98, [Subsample_Lanczos_uchar_param_1];
ld.param.u32 %r562, [Subsample_Lanczos_uchar_param_2];
ld.param.u32 %r563, [Subsample_Lanczos_uchar_param_3];
ld.param.u32 %r564, [Subsample_Lanczos_uchar_param_4];
ld.param.u32 %r565, [Subsample_Lanczos_uchar_param_5];
ld.param.u32 %r566, [Subsample_Lanczos_uchar_param_6];
ld.param.u32 %r567, [Subsample_Lanczos_uchar_param_7];
mov.u32 %r568, %ntid.x;
mov.u32 %r569, %ctaid.x;
mov.u32 %r570, %tid.x;
mad.lo.s32 %r1, %r568, %r569, %r570;
mov.u32 %r571, %ntid.y;
mov.u32 %r572, %ctaid.y;
mov.u32 %r573, %tid.y;
mad.lo.s32 %r2, %r571, %r572, %r573;
setp.ge.s32 %p1, %r2, %r563;
setp.ge.s32 %p2, %r1, %r562;
or.pred %p3, %p1, %p2;
@%p3 bra BB6_386;
cvt.rn.f32.s32 %f309, %r565;
cvt.rn.f32.s32 %f310, %r562;
div.rn.f32 %f311, %f309, %f310;
cvt.rn.f32.s32 %f312, %r563;
cvt.rn.f32.s32 %f313, %r566;
div.rn.f32 %f314, %f313, %f312;
cvt.rn.f32.s32 %f315, %r1;
add.f32 %f316, %f315, 0f3F000000;
fma.rn.f32 %f317, %f316, %f311, 0fBF000000;
cvt.rn.f32.s32 %f318, %r2;
add.f32 %f319, %f318, 0f3F000000;
fma.rn.f32 %f1, %f319, %f314, 0fBF000000;
cvt.rmi.f32.f32 %f2, %f317;
cvt.rmi.f32.f32 %f3, %f1;
sub.f32 %f320, %f317, %f2;
add.f32 %f321, %f320, 0f3F800000;
mul.f32 %f4, %f321, 0f40490FDB;
mul.f32 %f5, %f320, 0f40490FDB;
add.f32 %f322, %f320, 0fBF800000;
mul.f32 %f6, %f322, 0f40490FDB;
add.f32 %f323, %f320, 0fC0000000;
mul.f32 %f7, %f323, 0f40490FDB;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f952, 0f3F800000;
mov.f32 %f861, %f952;
@%p4 bra BB6_49;
abs.f32 %f324, %f4;
setp.neu.f32 %p5, %f324, 0f7F800000;
mov.f32 %f849, %f4;
@%p5 bra BB6_4;
mov.f32 %f325, 0f00000000;
mul.rn.f32 %f849, %f4, %f325;
BB6_4:
mul.f32 %f326, %f849, 0f3F22F983;
cvt.rni.s32.f32 %r1329, %f326;
cvt.rn.f32.s32 %f327, %r1329;
neg.f32 %f328, %f327;
mov.f32 %f329, 0f3FC90FDA;
fma.rn.f32 %f330, %f328, %f329, %f849;
mov.f32 %f331, 0f33A22168;
fma.rn.f32 %f332, %f328, %f331, %f330;
mov.f32 %f333, 0f27C234C5;
fma.rn.f32 %f850, %f328, %f333, %f332;
abs.f32 %f334, %f849;
setp.leu.f32 %p6, %f334, 0f47CE4780;
@%p6 bra BB6_15;
mov.b32 %r4, %f849;
shl.b32 %r576, %r4, 8;
or.b32 %r5, %r576, -2147483648;
add.u64 %rd100, %SP, 0;
add.u64 %rd148, %SPL, 0;
mov.u32 %r1321, 0;
mov.u64 %rd147, __cudart_i2opi_f;
mov.u32 %r1320, -6;
BB6_6:
.pragma "nounroll";
ld.const.u32 %r579, [%rd147];
// inline asm
{
mad.lo.cc.u32 %r577, %r579, %r5, %r1321;
madc.hi.u32 %r1321, %r579, %r5, 0;
}
// inline asm
st.local.u32 [%rd148], %r577;
add.s64 %rd148, %rd148, 4;
add.s64 %rd147, %rd147, 4;
add.s32 %r1320, %r1320, 1;
setp.ne.s32 %p7, %r1320, 0;
@%p7 bra BB6_6;
bfe.u32 %r582, %r4, 23, 8;
add.s32 %r583, %r582, -128;
shr.u32 %r584, %r583, 5;
and.b32 %r10, %r4, -2147483648;
cvta.to.local.u64 %rd102, %rd100;
st.local.u32 [%rd102+24], %r1321;
bfe.u32 %r11, %r4, 23, 5;
mov.u32 %r585, 6;
sub.s32 %r586, %r585, %r584;
mul.wide.s32 %rd103, %r586, 4;
add.s64 %rd6, %rd102, %rd103;
ld.local.u32 %r1322, [%rd6];
ld.local.u32 %r1323, [%rd6+-4];
setp.eq.s32 %p8, %r11, 0;
@%p8 bra BB6_9;
mov.u32 %r587, 32;
sub.s32 %r588, %r587, %r11;
shr.u32 %r589, %r1323, %r588;
shl.b32 %r590, %r1322, %r11;
add.s32 %r1322, %r589, %r590;
ld.local.u32 %r591, [%rd6+-8];
shr.u32 %r592, %r591, %r588;
shl.b32 %r593, %r1323, %r11;
add.s32 %r1323, %r592, %r593;
BB6_9:
shr.u32 %r594, %r1323, 30;
shl.b32 %r595, %r1322, 2;
add.s32 %r1324, %r594, %r595;
shl.b32 %r19, %r1323, 2;
shr.u32 %r596, %r1324, 31;
shr.u32 %r597, %r1322, 30;
add.s32 %r20, %r596, %r597;
setp.eq.s32 %p9, %r596, 0;
@%p9 bra BB6_10;
not.b32 %r598, %r1324;
neg.s32 %r1326, %r19;
setp.eq.s32 %p10, %r19, 0;
selp.u32 %r599, 1, 0, %p10;
add.s32 %r1324, %r599, %r598;
xor.b32 %r1325, %r10, -2147483648;
bra.uni BB6_12;
BB6_10:
mov.u32 %r1325, %r10;
mov.u32 %r1326, %r19;
BB6_12:
clz.b32 %r1328, %r1324;
setp.eq.s32 %p11, %r1328, 0;
shl.b32 %r600, %r1324, %r1328;
mov.u32 %r601, 32;
sub.s32 %r602, %r601, %r1328;
shr.u32 %r603, %r1326, %r602;
add.s32 %r604, %r603, %r600;
selp.b32 %r28, %r1324, %r604, %p11;
mov.u32 %r605, -921707870;
mul.hi.u32 %r1327, %r28, %r605;
setp.eq.s32 %p12, %r10, 0;
neg.s32 %r606, %r20;
selp.b32 %r1329, %r20, %r606, %p12;
setp.lt.s32 %p13, %r1327, 1;
@%p13 bra BB6_14;
mul.lo.s32 %r607, %r28, -921707870;
shr.u32 %r608, %r607, 31;
shl.b32 %r609, %r1327, 1;
add.s32 %r1327, %r608, %r609;
add.s32 %r1328, %r1328, 1;
BB6_14:
mov.u32 %r610, 126;
sub.s32 %r611, %r610, %r1328;
shl.b32 %r612, %r611, 23;
add.s32 %r613, %r1327, 1;
shr.u32 %r614, %r613, 7;
add.s32 %r615, %r614, 1;
shr.u32 %r616, %r615, 1;
add.s32 %r617, %r616, %r612;
or.b32 %r618, %r617, %r1325;
mov.b32 %f850, %r618;
BB6_15:
mul.rn.f32 %f13, %f850, %f850;
and.b32 %r36, %r1329, 1;
setp.eq.s32 %p14, %r36, 0;
@%p14 bra BB6_17;
mov.f32 %f335, 0fBAB6061A;
mov.f32 %f336, 0f37CCF5CE;
fma.rn.f32 %f851, %f336, %f13, %f335;
bra.uni BB6_18;
BB6_17:
mov.f32 %f337, 0f3C08839E;
mov.f32 %f338, 0fB94CA1F9;
fma.rn.f32 %f851, %f338, %f13, %f337;
BB6_18:
@%p14 bra BB6_20;
mov.f32 %f339, 0f3D2AAAA5;
fma.rn.f32 %f340, %f851, %f13, %f339;
mov.f32 %f341, 0fBF000000;
fma.rn.f32 %f852, %f340, %f13, %f341;
bra.uni BB6_21;
BB6_20:
mov.f32 %f342, 0fBE2AAAA3;
fma.rn.f32 %f343, %f851, %f13, %f342;
mov.f32 %f344, 0f00000000;
fma.rn.f32 %f852, %f343, %f13, %f344;
BB6_21:
fma.rn.f32 %f853, %f852, %f850, %f850;
@%p14 bra BB6_23;
mov.f32 %f345, 0f3F800000;
fma.rn.f32 %f853, %f852, %f13, %f345;
BB6_23:
and.b32 %r619, %r1329, 2;
setp.eq.s32 %p17, %r619, 0;
@%p17 bra BB6_25;
mov.f32 %f346, 0f00000000;
mov.f32 %f347, 0fBF800000;
fma.rn.f32 %f853, %f853, %f347, %f346;
BB6_25:
mul.f32 %f855, %f4, 0f3F000000;
abs.f32 %f348, %f855;
setp.neu.f32 %p18, %f348, 0f7F800000;
@%p18 bra BB6_27;
mov.f32 %f349, 0f00000000;
mul.rn.f32 %f855, %f855, %f349;
BB6_27:
mul.f32 %f350, %f855, 0f3F22F983;
cvt.rni.s32.f32 %r1339, %f350;
cvt.rn.f32.s32 %f351, %r1339;
neg.f32 %f352, %f351;
fma.rn.f32 %f354, %f352, %f329, %f855;
fma.rn.f32 %f356, %f352, %f331, %f354;
fma.rn.f32 %f856, %f352, %f333, %f356;
abs.f32 %f358, %f855;
setp.leu.f32 %p19, %f358, 0f47CE4780;
@%p19 bra BB6_38;
mov.b32 %r38, %f855;
shr.u32 %r39, %r38, 23;
shl.b32 %r622, %r38, 8;
or.b32 %r40, %r622, -2147483648;
add.u64 %rd105, %SP, 0;
add.u64 %rd150, %SPL, 0;
mov.u32 %r1331, 0;
mov.u64 %rd149, __cudart_i2opi_f;
mov.u32 %r1330, -6;
BB6_29:
.pragma "nounroll";
ld.const.u32 %r625, [%rd149];
// inline asm
{
mad.lo.cc.u32 %r623, %r625, %r40, %r1331;
madc.hi.u32 %r1331, %r625, %r40, 0;
}
// inline asm
st.local.u32 [%rd150], %r623;
add.s64 %rd150, %rd150, 4;
add.s64 %rd149, %rd149, 4;
add.s32 %r1330, %r1330, 1;
setp.ne.s32 %p20, %r1330, 0;
@%p20 bra BB6_29;
and.b32 %r628, %r39, 255;
add.s32 %r629, %r628, -128;
shr.u32 %r630, %r629, 5;
and.b32 %r45, %r38, -2147483648;
cvta.to.local.u64 %rd107, %rd105;
st.local.u32 [%rd107+24], %r1331;
mov.u32 %r631, 6;
sub.s32 %r632, %r631, %r630;
mul.wide.s32 %rd108, %r632, 4;
add.s64 %rd12, %rd107, %rd108;
ld.local.u32 %r1332, [%rd12];
ld.local.u32 %r1333, [%rd12+-4];
and.b32 %r48, %r39, 31;
setp.eq.s32 %p21, %r48, 0;
@%p21 bra BB6_32;
mov.u32 %r633, 32;
sub.s32 %r634, %r633, %r48;
shr.u32 %r635, %r1333, %r634;
shl.b32 %r636, %r1332, %r48;
add.s32 %r1332, %r635, %r636;
ld.local.u32 %r637, [%rd12+-8];
shr.u32 %r638, %r637, %r634;
shl.b32 %r639, %r1333, %r48;
add.s32 %r1333, %r638, %r639;
BB6_32:
shr.u32 %r640, %r1333, 30;
shl.b32 %r641, %r1332, 2;
add.s32 %r1334, %r640, %r641;
shl.b32 %r54, %r1333, 2;
shr.u32 %r642, %r1334, 31;
shr.u32 %r643, %r1332, 30;
add.s32 %r55, %r642, %r643;
setp.eq.s32 %p22, %r642, 0;
@%p22 bra BB6_33;
not.b32 %r644, %r1334;
neg.s32 %r1336, %r54;
setp.eq.s32 %p23, %r54, 0;
selp.u32 %r645, 1, 0, %p23;
add.s32 %r1334, %r645, %r644;
xor.b32 %r1335, %r45, -2147483648;
bra.uni BB6_35;
BB6_33:
mov.u32 %r1335, %r45;
mov.u32 %r1336, %r54;
BB6_35:
clz.b32 %r1338, %r1334;
setp.eq.s32 %p24, %r1338, 0;
shl.b32 %r646, %r1334, %r1338;
mov.u32 %r647, 32;
sub.s32 %r648, %r647, %r1338;
shr.u32 %r649, %r1336, %r648;
add.s32 %r650, %r649, %r646;
selp.b32 %r63, %r1334, %r650, %p24;
mov.u32 %r651, -921707870;
mul.hi.u32 %r1337, %r63, %r651;
setp.eq.s32 %p25, %r45, 0;
neg.s32 %r652, %r55;
selp.b32 %r1339, %r55, %r652, %p25;
setp.lt.s32 %p26, %r1337, 1;
@%p26 bra BB6_37;
mul.lo.s32 %r653, %r63, -921707870;
shr.u32 %r654, %r653, 31;
shl.b32 %r655, %r1337, 1;
add.s32 %r1337, %r654, %r655;
add.s32 %r1338, %r1338, 1;
BB6_37:
mov.u32 %r656, 126;
sub.s32 %r657, %r656, %r1338;
shl.b32 %r658, %r657, 23;
add.s32 %r659, %r1337, 1;
shr.u32 %r660, %r659, 7;
add.s32 %r661, %r660, 1;
shr.u32 %r662, %r661, 1;
add.s32 %r663, %r662, %r658;
or.b32 %r664, %r663, %r1335;
mov.b32 %f856, %r664;
BB6_38:
mul.rn.f32 %f31, %f856, %f856;
and.b32 %r71, %r1339, 1;
setp.eq.s32 %p27, %r71, 0;
@%p27 bra BB6_40;
mov.f32 %f359, 0fBAB6061A;
mov.f32 %f360, 0f37CCF5CE;
fma.rn.f32 %f857, %f360, %f31, %f359;
bra.uni BB6_41;
BB6_40:
mov.f32 %f361, 0f3C08839E;
mov.f32 %f362, 0fB94CA1F9;
fma.rn.f32 %f857, %f362, %f31, %f361;
BB6_41:
@%p27 bra BB6_43;
mov.f32 %f363, 0f3D2AAAA5;
fma.rn.f32 %f364, %f857, %f31, %f363;
mov.f32 %f365, 0fBF000000;
fma.rn.f32 %f858, %f364, %f31, %f365;
bra.uni BB6_44;
BB6_43:
mov.f32 %f366, 0fBE2AAAA3;
fma.rn.f32 %f367, %f857, %f31, %f366;
mov.f32 %f368, 0f00000000;
fma.rn.f32 %f858, %f367, %f31, %f368;
BB6_44:
fma.rn.f32 %f859, %f858, %f856, %f856;
@%p27 bra BB6_46;
mov.f32 %f369, 0f3F800000;
fma.rn.f32 %f859, %f858, %f31, %f369;
BB6_46:
and.b32 %r665, %r1339, 2;
setp.eq.s32 %p30, %r665, 0;
@%p30 bra BB6_48;
mov.f32 %f370, 0f00000000;
mov.f32 %f371, 0fBF800000;
fma.rn.f32 %f859, %f859, %f371, %f370;
BB6_48:
mul.f32 %f372, %f4, %f4;
mul.f32 %f373, %f372, 0f3F000000;
mul.f32 %f374, %f853, %f859;
div.rn.f32 %f861, %f374, %f373;
BB6_49:
setp.eq.f32 %p31, %f5, 0f00000000;
mov.f32 %f874, %f952;
@%p31 bra BB6_97;
add.u64 %rd13, %SPL, 0;
abs.f32 %f376, %f5;
setp.neu.f32 %p32, %f376, 0f7F800000;
mov.f32 %f862, %f5;
@%p32 bra BB6_52;
mov.f32 %f377, 0f00000000;
mul.rn.f32 %f862, %f5, %f377;
BB6_52:
mul.f32 %f378, %f862, 0f3F22F983;
cvt.rni.s32.f32 %r1349, %f378;
cvt.rn.f32.s32 %f379, %r1349;
neg.f32 %f380, %f379;
mov.f32 %f381, 0f3FC90FDA;
fma.rn.f32 %f382, %f380, %f381, %f862;
mov.f32 %f383, 0f33A22168;
fma.rn.f32 %f384, %f380, %f383, %f382;
mov.f32 %f385, 0f27C234C5;
fma.rn.f32 %f863, %f380, %f385, %f384;
abs.f32 %f386, %f862;
add.s64 %rd14, %rd13, 24;
setp.leu.f32 %p33, %f386, 0f47CE4780;
@%p33 bra BB6_63;
mov.b32 %r73, %f862;
shr.u32 %r74, %r73, 23;
shl.b32 %r668, %r73, 8;
or.b32 %r75, %r668, -2147483648;
mov.u32 %r1341, 0;
mov.u64 %rd151, __cudart_i2opi_f;
mov.u32 %r1340, -6;
mov.u64 %rd152, %rd13;
BB6_54:
.pragma "nounroll";
ld.const.u32 %r671, [%rd151];
// inline asm
{
mad.lo.cc.u32 %r669, %r671, %r75, %r1341;
madc.hi.u32 %r1341, %r671, %r75, 0;
}
// inline asm
st.local.u32 [%rd152], %r669;
add.s64 %rd152, %rd152, 4;
add.s64 %rd151, %rd151, 4;
add.s32 %r1340, %r1340, 1;
setp.ne.s32 %p34, %r1340, 0;
@%p34 bra BB6_54;
and.b32 %r674, %r74, 255;
add.s32 %r675, %r674, -128;
shr.u32 %r676, %r675, 5;
and.b32 %r80, %r73, -2147483648;
st.local.u32 [%rd14], %r1341;
mov.u32 %r677, 6;
sub.s32 %r678, %r677, %r676;
mul.wide.s32 %rd111, %r678, 4;
add.s64 %rd19, %rd13, %rd111;
ld.local.u32 %r1342, [%rd19];
ld.local.u32 %r1343, [%rd19+-4];
and.b32 %r83, %r74, 31;
setp.eq.s32 %p35, %r83, 0;
@%p35 bra BB6_57;
mov.u32 %r679, 32;
sub.s32 %r680, %r679, %r83;
shr.u32 %r681, %r1343, %r680;
shl.b32 %r682, %r1342, %r83;
add.s32 %r1342, %r681, %r682;
ld.local.u32 %r683, [%rd19+-8];
shr.u32 %r684, %r683, %r680;
shl.b32 %r685, %r1343, %r83;
add.s32 %r1343, %r684, %r685;
BB6_57:
shr.u32 %r686, %r1343, 30;
shl.b32 %r687, %r1342, 2;
add.s32 %r1344, %r686, %r687;
shl.b32 %r89, %r1343, 2;
shr.u32 %r688, %r1344, 31;
shr.u32 %r689, %r1342, 30;
add.s32 %r90, %r688, %r689;
setp.eq.s32 %p36, %r688, 0;
@%p36 bra BB6_58;
not.b32 %r690, %r1344;
neg.s32 %r1346, %r89;
setp.eq.s32 %p37, %r89, 0;
selp.u32 %r691, 1, 0, %p37;
add.s32 %r1344, %r691, %r690;
xor.b32 %r1345, %r80, -2147483648;
bra.uni BB6_60;
BB6_58:
mov.u32 %r1345, %r80;
mov.u32 %r1346, %r89;
BB6_60:
clz.b32 %r1348, %r1344;
setp.eq.s32 %p38, %r1348, 0;
shl.b32 %r692, %r1344, %r1348;
mov.u32 %r693, 32;
sub.s32 %r694, %r693, %r1348;
shr.u32 %r695, %r1346, %r694;
add.s32 %r696, %r695, %r692;
selp.b32 %r98, %r1344, %r696, %p38;
mov.u32 %r697, -921707870;
mul.hi.u32 %r1347, %r98, %r697;
setp.eq.s32 %p39, %r80, 0;
neg.s32 %r698, %r90;
selp.b32 %r1349, %r90, %r698, %p39;
setp.lt.s32 %p40, %r1347, 1;
@%p40 bra BB6_62;
mul.lo.s32 %r699, %r98, -921707870;
shr.u32 %r700, %r699, 31;
shl.b32 %r701, %r1347, 1;
add.s32 %r1347, %r700, %r701;
add.s32 %r1348, %r1348, 1;
BB6_62:
mov.u32 %r702, 126;
sub.s32 %r703, %r702, %r1348;
shl.b32 %r704, %r703, 23;
add.s32 %r705, %r1347, 1;
shr.u32 %r706, %r705, 7;
add.s32 %r707, %r706, 1;
shr.u32 %r708, %r707, 1;
add.s32 %r709, %r708, %r704;
or.b32 %r710, %r709, %r1345;
mov.b32 %f863, %r710;
BB6_63:
mul.rn.f32 %f50, %f863, %f863;
and.b32 %r106, %r1349, 1;
setp.eq.s32 %p41, %r106, 0;
@%p41 bra BB6_65;
mov.f32 %f387, 0fBAB6061A;
mov.f32 %f388, 0f37CCF5CE;
fma.rn.f32 %f864, %f388, %f50, %f387;
bra.uni BB6_66;
BB6_65:
mov.f32 %f389, 0f3C08839E;
mov.f32 %f390, 0fB94CA1F9;
fma.rn.f32 %f864, %f390, %f50, %f389;
BB6_66:
@%p41 bra BB6_68;
mov.f32 %f391, 0f3D2AAAA5;
fma.rn.f32 %f392, %f864, %f50, %f391;
mov.f32 %f393, 0fBF000000;
fma.rn.f32 %f865, %f392, %f50, %f393;
bra.uni BB6_69;
BB6_68:
mov.f32 %f394, 0fBE2AAAA3;
fma.rn.f32 %f395, %f864, %f50, %f394;
mov.f32 %f396, 0f00000000;
fma.rn.f32 %f865, %f395, %f50, %f396;
BB6_69:
fma.rn.f32 %f866, %f865, %f863, %f863;
@%p41 bra BB6_71;
mov.f32 %f397, 0f3F800000;
fma.rn.f32 %f866, %f865, %f50, %f397;
BB6_71:
and.b32 %r711, %r1349, 2;
setp.eq.s32 %p44, %r711, 0;
@%p44 bra BB6_73;
mov.f32 %f398, 0f00000000;
mov.f32 %f399, 0fBF800000;
fma.rn.f32 %f866, %f866, %f399, %f398;
BB6_73:
mul.f32 %f868, %f5, 0f3F000000;
abs.f32 %f400, %f868;
setp.neu.f32 %p45, %f400, 0f7F800000;
@%p45 bra BB6_75;
mov.f32 %f401, 0f00000000;
mul.rn.f32 %f868, %f868, %f401;
BB6_75:
mul.f32 %f402, %f868, 0f3F22F983;
cvt.rni.s32.f32 %r1359, %f402;
cvt.rn.f32.s32 %f403, %r1359;
neg.f32 %f404, %f403;
fma.rn.f32 %f406, %f404, %f381, %f868;
fma.rn.f32 %f408, %f404, %f383, %f406;
fma.rn.f32 %f869, %f404, %f385, %f408;
abs.f32 %f410, %f868;
setp.leu.f32 %p46, %f410, 0f47CE4780;
@%p46 bra BB6_86;
mov.b32 %r108, %f868;
shr.u32 %r109, %r108, 23;
shl.b32 %r714, %r108, 8;
or.b32 %r110, %r714, -2147483648;
mov.u32 %r1351, 0;
mov.u64 %rd153, __cudart_i2opi_f;
mov.u32 %r1350, -6;
mov.u64 %rd154, %rd13;
BB6_77:
.pragma "nounroll";
ld.const.u32 %r717, [%rd153];
// inline asm
{
mad.lo.cc.u32 %r715, %r717, %r110, %r1351;
madc.hi.u32 %r1351, %r717, %r110, 0;
}
// inline asm
st.local.u32 [%rd154], %r715;
add.s64 %rd154, %rd154, 4;
add.s64 %rd153, %rd153, 4;
add.s32 %r1350, %r1350, 1;
setp.ne.s32 %p47, %r1350, 0;
@%p47 bra BB6_77;
and.b32 %r720, %r109, 255;
add.s32 %r721, %r720, -128;
shr.u32 %r722, %r721, 5;
and.b32 %r115, %r108, -2147483648;
st.local.u32 [%rd14], %r1351;
mov.u32 %r723, 6;
sub.s32 %r724, %r723, %r722;
mul.wide.s32 %rd113, %r724, 4;
add.s64 %rd24, %rd13, %rd113;
ld.local.u32 %r1352, [%rd24];
ld.local.u32 %r1353, [%rd24+-4];
and.b32 %r118, %r109, 31;
setp.eq.s32 %p48, %r118, 0;
@%p48 bra BB6_80;
mov.u32 %r725, 32;
sub.s32 %r726, %r725, %r118;
shr.u32 %r727, %r1353, %r726;
shl.b32 %r728, %r1352, %r118;
add.s32 %r1352, %r727, %r728;
ld.local.u32 %r729, [%rd24+-8];
shr.u32 %r730, %r729, %r726;
shl.b32 %r731, %r1353, %r118;
add.s32 %r1353, %r730, %r731;
BB6_80:
shr.u32 %r732, %r1353, 30;
shl.b32 %r733, %r1352, 2;
add.s32 %r1354, %r732, %r733;
shl.b32 %r124, %r1353, 2;
shr.u32 %r734, %r1354, 31;
shr.u32 %r735, %r1352, 30;
add.s32 %r125, %r734, %r735;
setp.eq.s32 %p49, %r734, 0;
@%p49 bra BB6_81;
not.b32 %r736, %r1354;
neg.s32 %r1356, %r124;
setp.eq.s32 %p50, %r124, 0;
selp.u32 %r737, 1, 0, %p50;
add.s32 %r1354, %r737, %r736;
xor.b32 %r1355, %r115, -2147483648;
bra.uni BB6_83;
BB6_81:
mov.u32 %r1355, %r115;
mov.u32 %r1356, %r124;
BB6_83:
clz.b32 %r1358, %r1354;
setp.eq.s32 %p51, %r1358, 0;
shl.b32 %r738, %r1354, %r1358;
mov.u32 %r739, 32;
sub.s32 %r740, %r739, %r1358;
shr.u32 %r741, %r1356, %r740;
add.s32 %r742, %r741, %r738;
selp.b32 %r133, %r1354, %r742, %p51;
mov.u32 %r743, -921707870;
mul.hi.u32 %r1357, %r133, %r743;
setp.eq.s32 %p52, %r115, 0;
neg.s32 %r744, %r125;
selp.b32 %r1359, %r125, %r744, %p52;
setp.lt.s32 %p53, %r1357, 1;
@%p53 bra BB6_85;
mul.lo.s32 %r745, %r133, -921707870;
shr.u32 %r746, %r745, 31;
shl.b32 %r747, %r1357, 1;
add.s32 %r1357, %r746, %r747;
add.s32 %r1358, %r1358, 1;
BB6_85:
mov.u32 %r748, 126;
sub.s32 %r749, %r748, %r1358;
shl.b32 %r750, %r749, 23;
add.s32 %r751, %r1357, 1;
shr.u32 %r752, %r751, 7;
add.s32 %r753, %r752, 1;
shr.u32 %r754, %r753, 1;
add.s32 %r755, %r754, %r750;
or.b32 %r756, %r755, %r1355;
mov.b32 %f869, %r756;
BB6_86:
mul.rn.f32 %f68, %f869, %f869;
and.b32 %r141, %r1359, 1;
setp.eq.s32 %p54, %r141, 0;
@%p54 bra BB6_88;
mov.f32 %f411, 0fBAB6061A;
mov.f32 %f412, 0f37CCF5CE;
fma.rn.f32 %f870, %f412, %f68, %f411;
bra.uni BB6_89;
BB6_88:
mov.f32 %f413, 0f3C08839E;
mov.f32 %f414, 0fB94CA1F9;
fma.rn.f32 %f870, %f414, %f68, %f413;
BB6_89:
@%p54 bra BB6_91;
mov.f32 %f415, 0f3D2AAAA5;
fma.rn.f32 %f416, %f870, %f68, %f415;
mov.f32 %f417, 0fBF000000;
fma.rn.f32 %f871, %f416, %f68, %f417;
bra.uni BB6_92;
BB6_91:
mov.f32 %f418, 0fBE2AAAA3;
fma.rn.f32 %f419, %f870, %f68, %f418;
mov.f32 %f420, 0f00000000;
fma.rn.f32 %f871, %f419, %f68, %f420;
BB6_92:
fma.rn.f32 %f872, %f871, %f869, %f869;
@%p54 bra BB6_94;
mov.f32 %f421, 0f3F800000;
fma.rn.f32 %f872, %f871, %f68, %f421;
BB6_94:
and.b32 %r757, %r1359, 2;
setp.eq.s32 %p57, %r757, 0;
@%p57 bra BB6_96;
mov.f32 %f422, 0f00000000;
mov.f32 %f423, 0fBF800000;
fma.rn.f32 %f872, %f872, %f423, %f422;
BB6_96:
mul.f32 %f424, %f5, %f5;
mul.f32 %f425, %f424, 0f3F000000;
mul.f32 %f426, %f866, %f872;
div.rn.f32 %f874, %f426, %f425;
BB6_97:
setp.eq.f32 %p58, %f6, 0f00000000;
mov.f32 %f887, %f952;
@%p58 bra BB6_145;
add.u64 %rd25, %SPL, 0;
abs.f32 %f428, %f6;
setp.neu.f32 %p59, %f428, 0f7F800000;
mov.f32 %f875, %f6;
@%p59 bra BB6_100;
mov.f32 %f429, 0f00000000;
mul.rn.f32 %f875, %f6, %f429;
BB6_100:
mul.f32 %f430, %f875, 0f3F22F983;
cvt.rni.s32.f32 %r1369, %f430;
cvt.rn.f32.s32 %f431, %r1369;
neg.f32 %f432, %f431;
mov.f32 %f433, 0f3FC90FDA;
fma.rn.f32 %f434, %f432, %f433, %f875;
mov.f32 %f435, 0f33A22168;
fma.rn.f32 %f436, %f432, %f435, %f434;
mov.f32 %f437, 0f27C234C5;
fma.rn.f32 %f876, %f432, %f437, %f436;
abs.f32 %f438, %f875;
add.s64 %rd26, %rd25, 24;
setp.leu.f32 %p60, %f438, 0f47CE4780;
@%p60 bra BB6_111;
mov.b32 %r143, %f875;
shr.u32 %r144, %r143, 23;
shl.b32 %r760, %r143, 8;
or.b32 %r145, %r760, -2147483648;
mov.u32 %r1361, 0;
mov.u64 %rd155, __cudart_i2opi_f;
mov.u32 %r1360, -6;
mov.u64 %rd156, %rd25;
BB6_102:
.pragma "nounroll";
ld.const.u32 %r763, [%rd155];
// inline asm
{
mad.lo.cc.u32 %r761, %r763, %r145, %r1361;
madc.hi.u32 %r1361, %r763, %r145, 0;
}
// inline asm
st.local.u32 [%rd156], %r761;
add.s64 %rd156, %rd156, 4;
add.s64 %rd155, %rd155, 4;
add.s32 %r1360, %r1360, 1;
setp.ne.s32 %p61, %r1360, 0;
@%p61 bra BB6_102;
and.b32 %r766, %r144, 255;
add.s32 %r767, %r766, -128;
shr.u32 %r768, %r767, 5;
and.b32 %r150, %r143, -2147483648;
st.local.u32 [%rd26], %r1361;
mov.u32 %r769, 6;
sub.s32 %r770, %r769, %r768;
mul.wide.s32 %rd116, %r770, 4;
add.s64 %rd31, %rd25, %rd116;
ld.local.u32 %r1362, [%rd31];
ld.local.u32 %r1363, [%rd31+-4];
and.b32 %r153, %r144, 31;
setp.eq.s32 %p62, %r153, 0;
@%p62 bra BB6_105;
mov.u32 %r771, 32;
sub.s32 %r772, %r771, %r153;
shr.u32 %r773, %r1363, %r772;
shl.b32 %r774, %r1362, %r153;
add.s32 %r1362, %r773, %r774;
ld.local.u32 %r775, [%rd31+-8];
shr.u32 %r776, %r775, %r772;
shl.b32 %r777, %r1363, %r153;
add.s32 %r1363, %r776, %r777;
BB6_105:
shr.u32 %r778, %r1363, 30;
shl.b32 %r779, %r1362, 2;
add.s32 %r1364, %r778, %r779;
shl.b32 %r159, %r1363, 2;
shr.u32 %r780, %r1364, 31;
shr.u32 %r781, %r1362, 30;
add.s32 %r160, %r780, %r781;
setp.eq.s32 %p63, %r780, 0;
@%p63 bra BB6_106;
not.b32 %r782, %r1364;
neg.s32 %r1366, %r159;
setp.eq.s32 %p64, %r159, 0;
selp.u32 %r783, 1, 0, %p64;
add.s32 %r1364, %r783, %r782;
xor.b32 %r1365, %r150, -2147483648;
bra.uni BB6_108;
BB6_106:
mov.u32 %r1365, %r150;
mov.u32 %r1366, %r159;
BB6_108:
clz.b32 %r1368, %r1364;
setp.eq.s32 %p65, %r1368, 0;
shl.b32 %r784, %r1364, %r1368;
mov.u32 %r785, 32;
sub.s32 %r786, %r785, %r1368;
shr.u32 %r787, %r1366, %r786;
add.s32 %r788, %r787, %r784;
selp.b32 %r168, %r1364, %r788, %p65;
mov.u32 %r789, -921707870;
mul.hi.u32 %r1367, %r168, %r789;
setp.eq.s32 %p66, %r150, 0;
neg.s32 %r790, %r160;
selp.b32 %r1369, %r160, %r790, %p66;
setp.lt.s32 %p67, %r1367, 1;
@%p67 bra BB6_110;
mul.lo.s32 %r791, %r168, -921707870;
shr.u32 %r792, %r791, 31;
shl.b32 %r793, %r1367, 1;
add.s32 %r1367, %r792, %r793;
add.s32 %r1368, %r1368, 1;
BB6_110:
mov.u32 %r794, 126;
sub.s32 %r795, %r794, %r1368;
shl.b32 %r796, %r795, 23;
add.s32 %r797, %r1367, 1;
shr.u32 %r798, %r797, 7;
add.s32 %r799, %r798, 1;
shr.u32 %r800, %r799, 1;
add.s32 %r801, %r800, %r796;
or.b32 %r802, %r801, %r1365;
mov.b32 %f876, %r802;
BB6_111:
mul.rn.f32 %f87, %f876, %f876;
and.b32 %r176, %r1369, 1;
setp.eq.s32 %p68, %r176, 0;
@%p68 bra BB6_113;
mov.f32 %f439, 0fBAB6061A;
mov.f32 %f440, 0f37CCF5CE;
fma.rn.f32 %f877, %f440, %f87, %f439;
bra.uni BB6_114;
BB6_113:
mov.f32 %f441, 0f3C08839E;
mov.f32 %f442, 0fB94CA1F9;
fma.rn.f32 %f877, %f442, %f87, %f441;
BB6_114:
@%p68 bra BB6_116;
mov.f32 %f443, 0f3D2AAAA5;
fma.rn.f32 %f444, %f877, %f87, %f443;
mov.f32 %f445, 0fBF000000;
fma.rn.f32 %f878, %f444, %f87, %f445;
bra.uni BB6_117;
BB6_116:
mov.f32 %f446, 0fBE2AAAA3;
fma.rn.f32 %f447, %f877, %f87, %f446;
mov.f32 %f448, 0f00000000;
fma.rn.f32 %f878, %f447, %f87, %f448;
BB6_117:
fma.rn.f32 %f879, %f878, %f876, %f876;
@%p68 bra BB6_119;
mov.f32 %f449, 0f3F800000;
fma.rn.f32 %f879, %f878, %f87, %f449;
BB6_119:
and.b32 %r803, %r1369, 2;
setp.eq.s32 %p71, %r803, 0;
@%p71 bra BB6_121;
mov.f32 %f450, 0f00000000;
mov.f32 %f451, 0fBF800000;
fma.rn.f32 %f879, %f879, %f451, %f450;
BB6_121:
mul.f32 %f881, %f6, 0f3F000000;
abs.f32 %f452, %f881;
setp.neu.f32 %p72, %f452, 0f7F800000;
@%p72 bra BB6_123;
mov.f32 %f453, 0f00000000;
mul.rn.f32 %f881, %f881, %f453;
BB6_123:
mul.f32 %f454, %f881, 0f3F22F983;
cvt.rni.s32.f32 %r1379, %f454;
cvt.rn.f32.s32 %f455, %r1379;
neg.f32 %f456, %f455;
fma.rn.f32 %f458, %f456, %f433, %f881;
fma.rn.f32 %f460, %f456, %f435, %f458;
fma.rn.f32 %f882, %f456, %f437, %f460;
abs.f32 %f462, %f881;
setp.leu.f32 %p73, %f462, 0f47CE4780;
@%p73 bra BB6_134;
mov.b32 %r178, %f881;
shr.u32 %r179, %r178, 23;
shl.b32 %r806, %r178, 8;
or.b32 %r180, %r806, -2147483648;
mov.u32 %r1371, 0;
mov.u64 %rd157, __cudart_i2opi_f;
mov.u32 %r1370, -6;
mov.u64 %rd158, %rd25;
BB6_125:
.pragma "nounroll";
ld.const.u32 %r809, [%rd157];
// inline asm
{
mad.lo.cc.u32 %r807, %r809, %r180, %r1371;
madc.hi.u32 %r1371, %r809, %r180, 0;
}
// inline asm
st.local.u32 [%rd158], %r807;
add.s64 %rd158, %rd158, 4;
add.s64 %rd157, %rd157, 4;
add.s32 %r1370, %r1370, 1;
setp.ne.s32 %p74, %r1370, 0;
@%p74 bra BB6_125;
and.b32 %r812, %r179, 255;
add.s32 %r813, %r812, -128;
shr.u32 %r814, %r813, 5;
and.b32 %r185, %r178, -2147483648;
st.local.u32 [%rd26], %r1371;
mov.u32 %r815, 6;
sub.s32 %r816, %r815, %r814;
mul.wide.s32 %rd118, %r816, 4;
add.s64 %rd36, %rd25, %rd118;
ld.local.u32 %r1372, [%rd36];
ld.local.u32 %r1373, [%rd36+-4];
and.b32 %r188, %r179, 31;
setp.eq.s32 %p75, %r188, 0;
@%p75 bra BB6_128;
mov.u32 %r817, 32;
sub.s32 %r818, %r817, %r188;
shr.u32 %r819, %r1373, %r818;
shl.b32 %r820, %r1372, %r188;
add.s32 %r1372, %r819, %r820;
ld.local.u32 %r821, [%rd36+-8];
shr.u32 %r822, %r821, %r818;
shl.b32 %r823, %r1373, %r188;
add.s32 %r1373, %r822, %r823;
BB6_128:
shr.u32 %r824, %r1373, 30;
shl.b32 %r825, %r1372, 2;
add.s32 %r1374, %r824, %r825;
shl.b32 %r194, %r1373, 2;
shr.u32 %r826, %r1374, 31;
shr.u32 %r827, %r1372, 30;
add.s32 %r195, %r826, %r827;
setp.eq.s32 %p76, %r826, 0;
@%p76 bra BB6_129;
not.b32 %r828, %r1374;
neg.s32 %r1376, %r194;
setp.eq.s32 %p77, %r194, 0;
selp.u32 %r829, 1, 0, %p77;
add.s32 %r1374, %r829, %r828;
xor.b32 %r1375, %r185, -2147483648;
bra.uni BB6_131;
BB6_129:
mov.u32 %r1375, %r185;
mov.u32 %r1376, %r194;
BB6_131:
clz.b32 %r1378, %r1374;
setp.eq.s32 %p78, %r1378, 0;
shl.b32 %r830, %r1374, %r1378;
mov.u32 %r831, 32;
sub.s32 %r832, %r831, %r1378;
shr.u32 %r833, %r1376, %r832;
add.s32 %r834, %r833, %r830;
selp.b32 %r203, %r1374, %r834, %p78;
mov.u32 %r835, -921707870;
mul.hi.u32 %r1377, %r203, %r835;
setp.eq.s32 %p79, %r185, 0;
neg.s32 %r836, %r195;
selp.b32 %r1379, %r195, %r836, %p79;
setp.lt.s32 %p80, %r1377, 1;
@%p80 bra BB6_133;
mul.lo.s32 %r837, %r203, -921707870;
shr.u32 %r838, %r837, 31;
shl.b32 %r839, %r1377, 1;
add.s32 %r1377, %r838, %r839;
add.s32 %r1378, %r1378, 1;
BB6_133:
mov.u32 %r840, 126;
sub.s32 %r841, %r840, %r1378;
shl.b32 %r842, %r841, 23;
add.s32 %r843, %r1377, 1;
shr.u32 %r844, %r843, 7;
add.s32 %r845, %r844, 1;
shr.u32 %r846, %r845, 1;
add.s32 %r847, %r846, %r842;
or.b32 %r848, %r847, %r1375;
mov.b32 %f882, %r848;
BB6_134:
mul.rn.f32 %f105, %f882, %f882;
and.b32 %r211, %r1379, 1;
setp.eq.s32 %p81, %r211, 0;
@%p81 bra BB6_136;
mov.f32 %f463, 0fBAB6061A;
mov.f32 %f464, 0f37CCF5CE;
fma.rn.f32 %f883, %f464, %f105, %f463;
bra.uni BB6_137;
BB6_136:
mov.f32 %f465, 0f3C08839E;
mov.f32 %f466, 0fB94CA1F9;
fma.rn.f32 %f883, %f466, %f105, %f465;
BB6_137:
@%p81 bra BB6_139;
mov.f32 %f467, 0f3D2AAAA5;
fma.rn.f32 %f468, %f883, %f105, %f467;
mov.f32 %f469, 0fBF000000;
fma.rn.f32 %f884, %f468, %f105, %f469;
bra.uni BB6_140;
BB6_139:
mov.f32 %f470, 0fBE2AAAA3;
fma.rn.f32 %f471, %f883, %f105, %f470;
mov.f32 %f472, 0f00000000;
fma.rn.f32 %f884, %f471, %f105, %f472;
BB6_140:
fma.rn.f32 %f885, %f884, %f882, %f882;
@%p81 bra BB6_142;
mov.f32 %f473, 0f3F800000;
fma.rn.f32 %f885, %f884, %f105, %f473;
BB6_142:
and.b32 %r849, %r1379, 2;
setp.eq.s32 %p84, %r849, 0;
@%p84 bra BB6_144;
mov.f32 %f474, 0f00000000;
mov.f32 %f475, 0fBF800000;
fma.rn.f32 %f885, %f885, %f475, %f474;
BB6_144:
mul.f32 %f476, %f6, %f6;
mul.f32 %f477, %f476, 0f3F000000;
mul.f32 %f478, %f879, %f885;
div.rn.f32 %f887, %f478, %f477;
BB6_145:
setp.eq.f32 %p85, %f7, 0f00000000;
mov.f32 %f900, %f952;
@%p85 bra BB6_193;
add.u64 %rd37, %SPL, 0;
abs.f32 %f480, %f7;
setp.neu.f32 %p86, %f480, 0f7F800000;
mov.f32 %f888, %f7;
@%p86 bra BB6_148;
mov.f32 %f481, 0f00000000;
mul.rn.f32 %f888, %f7, %f481;
BB6_148:
mul.f32 %f482, %f888, 0f3F22F983;
cvt.rni.s32.f32 %r1389, %f482;
cvt.rn.f32.s32 %f483, %r1389;
neg.f32 %f484, %f483;
mov.f32 %f485, 0f3FC90FDA;
fma.rn.f32 %f486, %f484, %f485, %f888;
mov.f32 %f487, 0f33A22168;
fma.rn.f32 %f488, %f484, %f487, %f486;
mov.f32 %f489, 0f27C234C5;
fma.rn.f32 %f889, %f484, %f489, %f488;
abs.f32 %f490, %f888;
add.s64 %rd38, %rd37, 24;
setp.leu.f32 %p87, %f490, 0f47CE4780;
@%p87 bra BB6_159;
mov.b32 %r213, %f888;
shr.u32 %r214, %r213, 23;
shl.b32 %r852, %r213, 8;
or.b32 %r215, %r852, -2147483648;
mov.u32 %r1381, 0;
mov.u64 %rd159, __cudart_i2opi_f;
mov.u32 %r1380, -6;
mov.u64 %rd160, %rd37;
BB6_150:
.pragma "nounroll";
ld.const.u32 %r855, [%rd159];
// inline asm
{
mad.lo.cc.u32 %r853, %r855, %r215, %r1381;
madc.hi.u32 %r1381, %r855, %r215, 0;
}
// inline asm
st.local.u32 [%rd160], %r853;
add.s64 %rd160, %rd160, 4;
add.s64 %rd159, %rd159, 4;
add.s32 %r1380, %r1380, 1;
setp.ne.s32 %p88, %r1380, 0;
@%p88 bra BB6_150;
and.b32 %r858, %r214, 255;
add.s32 %r859, %r858, -128;
shr.u32 %r860, %r859, 5;
and.b32 %r220, %r213, -2147483648;
st.local.u32 [%rd38], %r1381;
mov.u32 %r861, 6;
sub.s32 %r862, %r861, %r860;
mul.wide.s32 %rd121, %r862, 4;
add.s64 %rd43, %rd37, %rd121;
ld.local.u32 %r1382, [%rd43];
ld.local.u32 %r1383, [%rd43+-4];
and.b32 %r223, %r214, 31;
setp.eq.s32 %p89, %r223, 0;
@%p89 bra BB6_153;
mov.u32 %r863, 32;
sub.s32 %r864, %r863, %r223;
shr.u32 %r865, %r1383, %r864;
shl.b32 %r866, %r1382, %r223;
add.s32 %r1382, %r865, %r866;
ld.local.u32 %r867, [%rd43+-8];
shr.u32 %r868, %r867, %r864;
shl.b32 %r869, %r1383, %r223;
add.s32 %r1383, %r868, %r869;
BB6_153:
shr.u32 %r870, %r1383, 30;
shl.b32 %r871, %r1382, 2;
add.s32 %r1384, %r870, %r871;
shl.b32 %r229, %r1383, 2;
shr.u32 %r872, %r1384, 31;
shr.u32 %r873, %r1382, 30;
add.s32 %r230, %r872, %r873;
setp.eq.s32 %p90, %r872, 0;
@%p90 bra BB6_154;
not.b32 %r874, %r1384;
neg.s32 %r1386, %r229;
setp.eq.s32 %p91, %r229, 0;
selp.u32 %r875, 1, 0, %p91;
add.s32 %r1384, %r875, %r874;
xor.b32 %r1385, %r220, -2147483648;
bra.uni BB6_156;
BB6_154:
mov.u32 %r1385, %r220;
mov.u32 %r1386, %r229;
BB6_156:
clz.b32 %r1388, %r1384;
setp.eq.s32 %p92, %r1388, 0;
shl.b32 %r876, %r1384, %r1388;
mov.u32 %r877, 32;
sub.s32 %r878, %r877, %r1388;
shr.u32 %r879, %r1386, %r878;
add.s32 %r880, %r879, %r876;
selp.b32 %r238, %r1384, %r880, %p92;
mov.u32 %r881, -921707870;
mul.hi.u32 %r1387, %r238, %r881;
setp.eq.s32 %p93, %r220, 0;
neg.s32 %r882, %r230;
selp.b32 %r1389, %r230, %r882, %p93;
setp.lt.s32 %p94, %r1387, 1;
@%p94 bra BB6_158;
mul.lo.s32 %r883, %r238, -921707870;
shr.u32 %r884, %r883, 31;
shl.b32 %r885, %r1387, 1;
add.s32 %r1387, %r884, %r885;
add.s32 %r1388, %r1388, 1;
BB6_158:
mov.u32 %r886, 126;
sub.s32 %r887, %r886, %r1388;
shl.b32 %r888, %r887, 23;
add.s32 %r889, %r1387, 1;
shr.u32 %r890, %r889, 7;
add.s32 %r891, %r890, 1;
shr.u32 %r892, %r891, 1;
add.s32 %r893, %r892, %r888;
or.b32 %r894, %r893, %r1385;
mov.b32 %f889, %r894;
BB6_159:
mul.rn.f32 %f124, %f889, %f889;
and.b32 %r246, %r1389, 1;
setp.eq.s32 %p95, %r246, 0;
@%p95 bra BB6_161;
mov.f32 %f491, 0fBAB6061A;
mov.f32 %f492, 0f37CCF5CE;
fma.rn.f32 %f890, %f492, %f124, %f491;
bra.uni BB6_162;
BB6_161:
mov.f32 %f493, 0f3C08839E;
mov.f32 %f494, 0fB94CA1F9;
fma.rn.f32 %f890, %f494, %f124, %f493;
BB6_162:
@%p95 bra BB6_164;
mov.f32 %f495, 0f3D2AAAA5;
fma.rn.f32 %f496, %f890, %f124, %f495;
mov.f32 %f497, 0fBF000000;
fma.rn.f32 %f891, %f496, %f124, %f497;
bra.uni BB6_165;
BB6_164:
mov.f32 %f498, 0fBE2AAAA3;
fma.rn.f32 %f499, %f890, %f124, %f498;
mov.f32 %f500, 0f00000000;
fma.rn.f32 %f891, %f499, %f124, %f500;
BB6_165:
fma.rn.f32 %f892, %f891, %f889, %f889;
@%p95 bra BB6_167;
mov.f32 %f501, 0f3F800000;
fma.rn.f32 %f892, %f891, %f124, %f501;
BB6_167:
and.b32 %r895, %r1389, 2;
setp.eq.s32 %p98, %r895, 0;
@%p98 bra BB6_169;
mov.f32 %f502, 0f00000000;
mov.f32 %f503, 0fBF800000;
fma.rn.f32 %f892, %f892, %f503, %f502;
BB6_169:
mul.f32 %f894, %f7, 0f3F000000;
abs.f32 %f504, %f894;
setp.neu.f32 %p99, %f504, 0f7F800000;
@%p99 bra BB6_171;
mov.f32 %f505, 0f00000000;
mul.rn.f32 %f894, %f894, %f505;
BB6_171:
mul.f32 %f506, %f894, 0f3F22F983;
cvt.rni.s32.f32 %r1399, %f506;
cvt.rn.f32.s32 %f507, %r1399;
neg.f32 %f508, %f507;
fma.rn.f32 %f510, %f508, %f485, %f894;
fma.rn.f32 %f512, %f508, %f487, %f510;
fma.rn.f32 %f895, %f508, %f489, %f512;
abs.f32 %f514, %f894;
setp.leu.f32 %p100, %f514, 0f47CE4780;
@%p100 bra BB6_182;
mov.b32 %r248, %f894;
shr.u32 %r249, %r248, 23;
shl.b32 %r898, %r248, 8;
or.b32 %r250, %r898, -2147483648;
mov.u32 %r1391, 0;
mov.u64 %rd161, __cudart_i2opi_f;
mov.u32 %r1390, -6;
mov.u64 %rd162, %rd37;
BB6_173:
.pragma "nounroll";
ld.const.u32 %r901, [%rd161];
// inline asm
{
mad.lo.cc.u32 %r899, %r901, %r250, %r1391;
madc.hi.u32 %r1391, %r901, %r250, 0;
}
// inline asm
st.local.u32 [%rd162], %r899;
add.s64 %rd162, %rd162, 4;
add.s64 %rd161, %rd161, 4;
add.s32 %r1390, %r1390, 1;
setp.ne.s32 %p101, %r1390, 0;
@%p101 bra BB6_173;
and.b32 %r904, %r249, 255;
add.s32 %r905, %r904, -128;
shr.u32 %r906, %r905, 5;
and.b32 %r255, %r248, -2147483648;
st.local.u32 [%rd38], %r1391;
mov.u32 %r907, 6;
sub.s32 %r908, %r907, %r906;
mul.wide.s32 %rd123, %r908, 4;
add.s64 %rd48, %rd37, %rd123;
ld.local.u32 %r1392, [%rd48];
ld.local.u32 %r1393, [%rd48+-4];
and.b32 %r258, %r249, 31;
setp.eq.s32 %p102, %r258, 0;
@%p102 bra BB6_176;
mov.u32 %r909, 32;
sub.s32 %r910, %r909, %r258;
shr.u32 %r911, %r1393, %r910;
shl.b32 %r912, %r1392, %r258;
add.s32 %r1392, %r911, %r912;
ld.local.u32 %r913, [%rd48+-8];
shr.u32 %r914, %r913, %r910;
shl.b32 %r915, %r1393, %r258;
add.s32 %r1393, %r914, %r915;
BB6_176:
shr.u32 %r916, %r1393, 30;
shl.b32 %r917, %r1392, 2;
add.s32 %r1394, %r916, %r917;
shl.b32 %r264, %r1393, 2;
shr.u32 %r918, %r1394, 31;
shr.u32 %r919, %r1392, 30;
add.s32 %r265, %r918, %r919;
setp.eq.s32 %p103, %r918, 0;
@%p103 bra BB6_177;
not.b32 %r920, %r1394;
neg.s32 %r1396, %r264;
setp.eq.s32 %p104, %r264, 0;
selp.u32 %r921, 1, 0, %p104;
add.s32 %r1394, %r921, %r920;
xor.b32 %r1395, %r255, -2147483648;
bra.uni BB6_179;
BB6_177:
mov.u32 %r1395, %r255;
mov.u32 %r1396, %r264;
BB6_179:
clz.b32 %r1398, %r1394;
setp.eq.s32 %p105, %r1398, 0;
shl.b32 %r922, %r1394, %r1398;
mov.u32 %r923, 32;
sub.s32 %r924, %r923, %r1398;
shr.u32 %r925, %r1396, %r924;
add.s32 %r926, %r925, %r922;
selp.b32 %r273, %r1394, %r926, %p105;
mov.u32 %r927, -921707870;
mul.hi.u32 %r1397, %r273, %r927;
setp.eq.s32 %p106, %r255, 0;
neg.s32 %r928, %r265;
selp.b32 %r1399, %r265, %r928, %p106;
setp.lt.s32 %p107, %r1397, 1;
@%p107 bra BB6_181;
mul.lo.s32 %r929, %r273, -921707870;
shr.u32 %r930, %r929, 31;
shl.b32 %r931, %r1397, 1;
add.s32 %r1397, %r930, %r931;
add.s32 %r1398, %r1398, 1;
BB6_181:
mov.u32 %r932, 126;
sub.s32 %r933, %r932, %r1398;
shl.b32 %r934, %r933, 23;
add.s32 %r935, %r1397, 1;
shr.u32 %r936, %r935, 7;
add.s32 %r937, %r936, 1;
shr.u32 %r938, %r937, 1;
add.s32 %r939, %r938, %r934;
or.b32 %r940, %r939, %r1395;
mov.b32 %f895, %r940;
BB6_182:
mul.rn.f32 %f142, %f895, %f895;
and.b32 %r281, %r1399, 1;
setp.eq.s32 %p108, %r281, 0;
@%p108 bra BB6_184;
mov.f32 %f515, 0fBAB6061A;
mov.f32 %f516, 0f37CCF5CE;
fma.rn.f32 %f896, %f516, %f142, %f515;
bra.uni BB6_185;
BB6_184:
mov.f32 %f517, 0f3C08839E;
mov.f32 %f518, 0fB94CA1F9;
fma.rn.f32 %f896, %f518, %f142, %f517;
BB6_185:
@%p108 bra BB6_187;
mov.f32 %f519, 0f3D2AAAA5;
fma.rn.f32 %f520, %f896, %f142, %f519;
mov.f32 %f521, 0fBF000000;
fma.rn.f32 %f897, %f520, %f142, %f521;
bra.uni BB6_188;
BB6_187:
mov.f32 %f522, 0fBE2AAAA3;
fma.rn.f32 %f523, %f896, %f142, %f522;
mov.f32 %f524, 0f00000000;
fma.rn.f32 %f897, %f523, %f142, %f524;
BB6_188:
fma.rn.f32 %f898, %f897, %f895, %f895;
@%p108 bra BB6_190;
mov.f32 %f525, 0f3F800000;
fma.rn.f32 %f898, %f897, %f142, %f525;
BB6_190:
and.b32 %r941, %r1399, 2;
setp.eq.s32 %p111, %r941, 0;
@%p111 bra BB6_192;
mov.f32 %f526, 0f00000000;
mov.f32 %f527, 0fBF800000;
fma.rn.f32 %f898, %f898, %f527, %f526;
BB6_192:
mul.f32 %f528, %f7, %f7;
mul.f32 %f529, %f528, 0f3F000000;
mul.f32 %f530, %f892, %f898;
div.rn.f32 %f900, %f530, %f529;
BB6_193:
sub.f32 %f532, %f1, %f3;
add.f32 %f533, %f532, 0f3F800000;
mul.f32 %f156, %f533, 0f40490FDB;
mul.f32 %f157, %f532, 0f40490FDB;
add.f32 %f534, %f532, 0fBF800000;
mul.f32 %f158, %f534, 0f40490FDB;
add.f32 %f535, %f532, 0fC0000000;
mul.f32 %f159, %f535, 0f40490FDB;
setp.eq.f32 %p112, %f156, 0f00000000;
mov.f32 %f913, %f952;
@%p112 bra BB6_241;
add.u64 %rd49, %SPL, 0;
abs.f32 %f536, %f156;
setp.neu.f32 %p113, %f536, 0f7F800000;
mov.f32 %f901, %f156;
@%p113 bra BB6_196;
mov.f32 %f537, 0f00000000;
mul.rn.f32 %f901, %f156, %f537;
BB6_196:
mul.f32 %f538, %f901, 0f3F22F983;
cvt.rni.s32.f32 %r1409, %f538;
cvt.rn.f32.s32 %f539, %r1409;
neg.f32 %f540, %f539;
mov.f32 %f541, 0f3FC90FDA;
fma.rn.f32 %f542, %f540, %f541, %f901;
mov.f32 %f543, 0f33A22168;
fma.rn.f32 %f544, %f540, %f543, %f542;
mov.f32 %f545, 0f27C234C5;
fma.rn.f32 %f902, %f540, %f545, %f544;
abs.f32 %f546, %f901;
add.s64 %rd50, %rd49, 24;
setp.leu.f32 %p114, %f546, 0f47CE4780;
@%p114 bra BB6_207;
mov.b32 %r283, %f901;
shr.u32 %r284, %r283, 23;
shl.b32 %r944, %r283, 8;
or.b32 %r285, %r944, -2147483648;
mov.u32 %r1401, 0;
mov.u64 %rd163, __cudart_i2opi_f;
mov.u32 %r1400, -6;
mov.u64 %rd164, %rd49;
BB6_198:
.pragma "nounroll";
ld.const.u32 %r947, [%rd163];
// inline asm
{
mad.lo.cc.u32 %r945, %r947, %r285, %r1401;
madc.hi.u32 %r1401, %r947, %r285, 0;
}
// inline asm
st.local.u32 [%rd164], %r945;
add.s64 %rd164, %rd164, 4;
add.s64 %rd163, %rd163, 4;
add.s32 %r1400, %r1400, 1;
setp.ne.s32 %p115, %r1400, 0;
@%p115 bra BB6_198;
and.b32 %r950, %r284, 255;
add.s32 %r951, %r950, -128;
shr.u32 %r952, %r951, 5;
and.b32 %r290, %r283, -2147483648;
st.local.u32 [%rd50], %r1401;
mov.u32 %r953, 6;
sub.s32 %r954, %r953, %r952;
mul.wide.s32 %rd126, %r954, 4;
add.s64 %rd55, %rd49, %rd126;
ld.local.u32 %r1402, [%rd55];
ld.local.u32 %r1403, [%rd55+-4];
and.b32 %r293, %r284, 31;
setp.eq.s32 %p116, %r293, 0;
@%p116 bra BB6_201;
mov.u32 %r955, 32;
sub.s32 %r956, %r955, %r293;
shr.u32 %r957, %r1403, %r956;
shl.b32 %r958, %r1402, %r293;
add.s32 %r1402, %r957, %r958;
ld.local.u32 %r959, [%rd55+-8];
shr.u32 %r960, %r959, %r956;
shl.b32 %r961, %r1403, %r293;
add.s32 %r1403, %r960, %r961;
BB6_201:
shr.u32 %r962, %r1403, 30;
shl.b32 %r963, %r1402, 2;
add.s32 %r1404, %r962, %r963;
shl.b32 %r299, %r1403, 2;
shr.u32 %r964, %r1404, 31;
shr.u32 %r965, %r1402, 30;
add.s32 %r300, %r964, %r965;
setp.eq.s32 %p117, %r964, 0;
@%p117 bra BB6_202;
not.b32 %r966, %r1404;
neg.s32 %r1406, %r299;
setp.eq.s32 %p118, %r299, 0;
selp.u32 %r967, 1, 0, %p118;
add.s32 %r1404, %r967, %r966;
xor.b32 %r1405, %r290, -2147483648;
bra.uni BB6_204;
BB6_202:
mov.u32 %r1405, %r290;
mov.u32 %r1406, %r299;
BB6_204:
clz.b32 %r1408, %r1404;
setp.eq.s32 %p119, %r1408, 0;
shl.b32 %r968, %r1404, %r1408;
mov.u32 %r969, 32;
sub.s32 %r970, %r969, %r1408;
shr.u32 %r971, %r1406, %r970;
add.s32 %r972, %r971, %r968;
selp.b32 %r308, %r1404, %r972, %p119;
mov.u32 %r973, -921707870;
mul.hi.u32 %r1407, %r308, %r973;
setp.eq.s32 %p120, %r290, 0;
neg.s32 %r974, %r300;
selp.b32 %r1409, %r300, %r974, %p120;
setp.lt.s32 %p121, %r1407, 1;
@%p121 bra BB6_206;
mul.lo.s32 %r975, %r308, -921707870;
shr.u32 %r976, %r975, 31;
shl.b32 %r977, %r1407, 1;
add.s32 %r1407, %r976, %r977;
add.s32 %r1408, %r1408, 1;
BB6_206:
mov.u32 %r978, 126;
sub.s32 %r979, %r978, %r1408;
shl.b32 %r980, %r979, 23;
add.s32 %r981, %r1407, 1;
shr.u32 %r982, %r981, 7;
add.s32 %r983, %r982, 1;
shr.u32 %r984, %r983, 1;
add.s32 %r985, %r984, %r980;
or.b32 %r986, %r985, %r1405;
mov.b32 %f902, %r986;
BB6_207:
mul.rn.f32 %f165, %f902, %f902;
and.b32 %r316, %r1409, 1;
setp.eq.s32 %p122, %r316, 0;
@%p122 bra BB6_209;
mov.f32 %f547, 0fBAB6061A;
mov.f32 %f548, 0f37CCF5CE;
fma.rn.f32 %f903, %f548, %f165, %f547;
bra.uni BB6_210;
BB6_209:
mov.f32 %f549, 0f3C08839E;
mov.f32 %f550, 0fB94CA1F9;
fma.rn.f32 %f903, %f550, %f165, %f549;
BB6_210:
@%p122 bra BB6_212;
mov.f32 %f551, 0f3D2AAAA5;
fma.rn.f32 %f552, %f903, %f165, %f551;
mov.f32 %f553, 0fBF000000;
fma.rn.f32 %f904, %f552, %f165, %f553;
bra.uni BB6_213;
BB6_212:
mov.f32 %f554, 0fBE2AAAA3;
fma.rn.f32 %f555, %f903, %f165, %f554;
mov.f32 %f556, 0f00000000;
fma.rn.f32 %f904, %f555, %f165, %f556;
BB6_213:
fma.rn.f32 %f905, %f904, %f902, %f902;
@%p122 bra BB6_215;
mov.f32 %f557, 0f3F800000;
fma.rn.f32 %f905, %f904, %f165, %f557;
BB6_215:
and.b32 %r987, %r1409, 2;
setp.eq.s32 %p125, %r987, 0;
@%p125 bra BB6_217;
mov.f32 %f558, 0f00000000;
mov.f32 %f559, 0fBF800000;
fma.rn.f32 %f905, %f905, %f559, %f558;
BB6_217:
mul.f32 %f907, %f156, 0f3F000000;
abs.f32 %f560, %f907;
setp.neu.f32 %p126, %f560, 0f7F800000;
@%p126 bra BB6_219;
mov.f32 %f561, 0f00000000;
mul.rn.f32 %f907, %f907, %f561;
BB6_219:
mul.f32 %f562, %f907, 0f3F22F983;
cvt.rni.s32.f32 %r1419, %f562;
cvt.rn.f32.s32 %f563, %r1419;
neg.f32 %f564, %f563;
fma.rn.f32 %f566, %f564, %f541, %f907;
fma.rn.f32 %f568, %f564, %f543, %f566;
fma.rn.f32 %f908, %f564, %f545, %f568;
abs.f32 %f570, %f907;
setp.leu.f32 %p127, %f570, 0f47CE4780;
@%p127 bra BB6_230;
mov.b32 %r318, %f907;
shr.u32 %r319, %r318, 23;
shl.b32 %r990, %r318, 8;
or.b32 %r320, %r990, -2147483648;
mov.u32 %r1411, 0;
mov.u64 %rd165, __cudart_i2opi_f;
mov.u32 %r1410, -6;
mov.u64 %rd166, %rd49;
BB6_221:
.pragma "nounroll";
ld.const.u32 %r993, [%rd165];
// inline asm
{
mad.lo.cc.u32 %r991, %r993, %r320, %r1411;
madc.hi.u32 %r1411, %r993, %r320, 0;
}
// inline asm
st.local.u32 [%rd166], %r991;
add.s64 %rd166, %rd166, 4;
add.s64 %rd165, %rd165, 4;
add.s32 %r1410, %r1410, 1;
setp.ne.s32 %p128, %r1410, 0;
@%p128 bra BB6_221;
and.b32 %r996, %r319, 255;
add.s32 %r997, %r996, -128;
shr.u32 %r998, %r997, 5;
and.b32 %r325, %r318, -2147483648;
st.local.u32 [%rd50], %r1411;
mov.u32 %r999, 6;
sub.s32 %r1000, %r999, %r998;
mul.wide.s32 %rd128, %r1000, 4;
add.s64 %rd60, %rd49, %rd128;
ld.local.u32 %r1412, [%rd60];
ld.local.u32 %r1413, [%rd60+-4];
and.b32 %r328, %r319, 31;
setp.eq.s32 %p129, %r328, 0;
@%p129 bra BB6_224;
mov.u32 %r1001, 32;
sub.s32 %r1002, %r1001, %r328;
shr.u32 %r1003, %r1413, %r1002;
shl.b32 %r1004, %r1412, %r328;
add.s32 %r1412, %r1003, %r1004;
ld.local.u32 %r1005, [%rd60+-8];
shr.u32 %r1006, %r1005, %r1002;
shl.b32 %r1007, %r1413, %r328;
add.s32 %r1413, %r1006, %r1007;
BB6_224:
shr.u32 %r1008, %r1413, 30;
shl.b32 %r1009, %r1412, 2;
add.s32 %r1414, %r1008, %r1009;
shl.b32 %r334, %r1413, 2;
shr.u32 %r1010, %r1414, 31;
shr.u32 %r1011, %r1412, 30;
add.s32 %r335, %r1010, %r1011;
setp.eq.s32 %p130, %r1010, 0;
@%p130 bra BB6_225;
not.b32 %r1012, %r1414;
neg.s32 %r1416, %r334;
setp.eq.s32 %p131, %r334, 0;
selp.u32 %r1013, 1, 0, %p131;
add.s32 %r1414, %r1013, %r1012;
xor.b32 %r1415, %r325, -2147483648;
bra.uni BB6_227;
BB6_225:
mov.u32 %r1415, %r325;
mov.u32 %r1416, %r334;
BB6_227:
clz.b32 %r1418, %r1414;
setp.eq.s32 %p132, %r1418, 0;
shl.b32 %r1014, %r1414, %r1418;
mov.u32 %r1015, 32;
sub.s32 %r1016, %r1015, %r1418;
shr.u32 %r1017, %r1416, %r1016;
add.s32 %r1018, %r1017, %r1014;
selp.b32 %r343, %r1414, %r1018, %p132;
mov.u32 %r1019, -921707870;
mul.hi.u32 %r1417, %r343, %r1019;
setp.eq.s32 %p133, %r325, 0;
neg.s32 %r1020, %r335;
selp.b32 %r1419, %r335, %r1020, %p133;
setp.lt.s32 %p134, %r1417, 1;
@%p134 bra BB6_229;
mul.lo.s32 %r1021, %r343, -921707870;
shr.u32 %r1022, %r1021, 31;
shl.b32 %r1023, %r1417, 1;
add.s32 %r1417, %r1022, %r1023;
add.s32 %r1418, %r1418, 1;
BB6_229:
mov.u32 %r1024, 126;
sub.s32 %r1025, %r1024, %r1418;
shl.b32 %r1026, %r1025, 23;
add.s32 %r1027, %r1417, 1;
shr.u32 %r1028, %r1027, 7;
add.s32 %r1029, %r1028, 1;
shr.u32 %r1030, %r1029, 1;
add.s32 %r1031, %r1030, %r1026;
or.b32 %r1032, %r1031, %r1415;
mov.b32 %f908, %r1032;
BB6_230:
mul.rn.f32 %f183, %f908, %f908;
and.b32 %r351, %r1419, 1;
setp.eq.s32 %p135, %r351, 0;
@%p135 bra BB6_232;
mov.f32 %f571, 0fBAB6061A;
mov.f32 %f572, 0f37CCF5CE;
fma.rn.f32 %f909, %f572, %f183, %f571;
bra.uni BB6_233;
BB6_232:
mov.f32 %f573, 0f3C08839E;
mov.f32 %f574, 0fB94CA1F9;
fma.rn.f32 %f909, %f574, %f183, %f573;
BB6_233:
@%p135 bra BB6_235;
mov.f32 %f575, 0f3D2AAAA5;
fma.rn.f32 %f576, %f909, %f183, %f575;
mov.f32 %f577, 0fBF000000;
fma.rn.f32 %f910, %f576, %f183, %f577;
bra.uni BB6_236;
BB6_235:
mov.f32 %f578, 0fBE2AAAA3;
fma.rn.f32 %f579, %f909, %f183, %f578;
mov.f32 %f580, 0f00000000;
fma.rn.f32 %f910, %f579, %f183, %f580;
BB6_236:
fma.rn.f32 %f911, %f910, %f908, %f908;
@%p135 bra BB6_238;
mov.f32 %f581, 0f3F800000;
fma.rn.f32 %f911, %f910, %f183, %f581;
BB6_238:
and.b32 %r1033, %r1419, 2;
setp.eq.s32 %p138, %r1033, 0;
@%p138 bra BB6_240;
mov.f32 %f582, 0f00000000;
mov.f32 %f583, 0fBF800000;
fma.rn.f32 %f911, %f911, %f583, %f582;
BB6_240:
mul.f32 %f584, %f156, %f156;
mul.f32 %f585, %f584, 0f3F000000;
mul.f32 %f586, %f905, %f911;
div.rn.f32 %f913, %f586, %f585;
BB6_241:
setp.eq.f32 %p139, %f157, 0f00000000;
mov.f32 %f926, %f952;
@%p139 bra BB6_289;
add.u64 %rd61, %SPL, 0;
abs.f32 %f588, %f157;
setp.neu.f32 %p140, %f588, 0f7F800000;
mov.f32 %f914, %f157;
@%p140 bra BB6_244;
mov.f32 %f589, 0f00000000;
mul.rn.f32 %f914, %f157, %f589;
BB6_244:
mul.f32 %f590, %f914, 0f3F22F983;
cvt.rni.s32.f32 %r1429, %f590;
cvt.rn.f32.s32 %f591, %r1429;
neg.f32 %f592, %f591;
mov.f32 %f593, 0f3FC90FDA;
fma.rn.f32 %f594, %f592, %f593, %f914;
mov.f32 %f595, 0f33A22168;
fma.rn.f32 %f596, %f592, %f595, %f594;
mov.f32 %f597, 0f27C234C5;
fma.rn.f32 %f915, %f592, %f597, %f596;
abs.f32 %f598, %f914;
add.s64 %rd62, %rd61, 24;
setp.leu.f32 %p141, %f598, 0f47CE4780;
@%p141 bra BB6_255;
mov.b32 %r353, %f914;
shr.u32 %r354, %r353, 23;
shl.b32 %r1036, %r353, 8;
or.b32 %r355, %r1036, -2147483648;
mov.u32 %r1421, 0;
mov.u64 %rd167, __cudart_i2opi_f;
mov.u32 %r1420, -6;
mov.u64 %rd168, %rd61;
BB6_246:
.pragma "nounroll";
ld.const.u32 %r1039, [%rd167];
// inline asm
{
mad.lo.cc.u32 %r1037, %r1039, %r355, %r1421;
madc.hi.u32 %r1421, %r1039, %r355, 0;
}
// inline asm
st.local.u32 [%rd168], %r1037;
add.s64 %rd168, %rd168, 4;
add.s64 %rd167, %rd167, 4;
add.s32 %r1420, %r1420, 1;
setp.ne.s32 %p142, %r1420, 0;
@%p142 bra BB6_246;
and.b32 %r1042, %r354, 255;
add.s32 %r1043, %r1042, -128;
shr.u32 %r1044, %r1043, 5;
and.b32 %r360, %r353, -2147483648;
st.local.u32 [%rd62], %r1421;
mov.u32 %r1045, 6;
sub.s32 %r1046, %r1045, %r1044;
mul.wide.s32 %rd131, %r1046, 4;
add.s64 %rd67, %rd61, %rd131;
ld.local.u32 %r1422, [%rd67];
ld.local.u32 %r1423, [%rd67+-4];
and.b32 %r363, %r354, 31;
setp.eq.s32 %p143, %r363, 0;
@%p143 bra BB6_249;
mov.u32 %r1047, 32;
sub.s32 %r1048, %r1047, %r363;
shr.u32 %r1049, %r1423, %r1048;
shl.b32 %r1050, %r1422, %r363;
add.s32 %r1422, %r1049, %r1050;
ld.local.u32 %r1051, [%rd67+-8];
shr.u32 %r1052, %r1051, %r1048;
shl.b32 %r1053, %r1423, %r363;
add.s32 %r1423, %r1052, %r1053;
BB6_249:
shr.u32 %r1054, %r1423, 30;
shl.b32 %r1055, %r1422, 2;
add.s32 %r1424, %r1054, %r1055;
shl.b32 %r369, %r1423, 2;
shr.u32 %r1056, %r1424, 31;
shr.u32 %r1057, %r1422, 30;
add.s32 %r370, %r1056, %r1057;
setp.eq.s32 %p144, %r1056, 0;
@%p144 bra BB6_250;
not.b32 %r1058, %r1424;
neg.s32 %r1426, %r369;
setp.eq.s32 %p145, %r369, 0;
selp.u32 %r1059, 1, 0, %p145;
add.s32 %r1424, %r1059, %r1058;
xor.b32 %r1425, %r360, -2147483648;
bra.uni BB6_252;
BB6_250:
mov.u32 %r1425, %r360;
mov.u32 %r1426, %r369;
BB6_252:
clz.b32 %r1428, %r1424;
setp.eq.s32 %p146, %r1428, 0;
shl.b32 %r1060, %r1424, %r1428;
mov.u32 %r1061, 32;
sub.s32 %r1062, %r1061, %r1428;
shr.u32 %r1063, %r1426, %r1062;
add.s32 %r1064, %r1063, %r1060;
selp.b32 %r378, %r1424, %r1064, %p146;
mov.u32 %r1065, -921707870;
mul.hi.u32 %r1427, %r378, %r1065;
setp.eq.s32 %p147, %r360, 0;
neg.s32 %r1066, %r370;
selp.b32 %r1429, %r370, %r1066, %p147;
setp.lt.s32 %p148, %r1427, 1;
@%p148 bra BB6_254;
mul.lo.s32 %r1067, %r378, -921707870;
shr.u32 %r1068, %r1067, 31;
shl.b32 %r1069, %r1427, 1;
add.s32 %r1427, %r1068, %r1069;
add.s32 %r1428, %r1428, 1;
BB6_254:
mov.u32 %r1070, 126;
sub.s32 %r1071, %r1070, %r1428;
shl.b32 %r1072, %r1071, 23;
add.s32 %r1073, %r1427, 1;
shr.u32 %r1074, %r1073, 7;
add.s32 %r1075, %r1074, 1;
shr.u32 %r1076, %r1075, 1;
add.s32 %r1077, %r1076, %r1072;
or.b32 %r1078, %r1077, %r1425;
mov.b32 %f915, %r1078;
BB6_255:
mul.rn.f32 %f202, %f915, %f915;
and.b32 %r386, %r1429, 1;
setp.eq.s32 %p149, %r386, 0;
@%p149 bra BB6_257;
mov.f32 %f599, 0fBAB6061A;
mov.f32 %f600, 0f37CCF5CE;
fma.rn.f32 %f916, %f600, %f202, %f599;
bra.uni BB6_258;
BB6_257:
mov.f32 %f601, 0f3C08839E;
mov.f32 %f602, 0fB94CA1F9;
fma.rn.f32 %f916, %f602, %f202, %f601;
BB6_258:
@%p149 bra BB6_260;
mov.f32 %f603, 0f3D2AAAA5;
fma.rn.f32 %f604, %f916, %f202, %f603;
mov.f32 %f605, 0fBF000000;
fma.rn.f32 %f917, %f604, %f202, %f605;
bra.uni BB6_261;
BB6_260:
mov.f32 %f606, 0fBE2AAAA3;
fma.rn.f32 %f607, %f916, %f202, %f606;
mov.f32 %f608, 0f00000000;
fma.rn.f32 %f917, %f607, %f202, %f608;
BB6_261:
fma.rn.f32 %f918, %f917, %f915, %f915;
@%p149 bra BB6_263;
mov.f32 %f609, 0f3F800000;
fma.rn.f32 %f918, %f917, %f202, %f609;
BB6_263:
and.b32 %r1079, %r1429, 2;
setp.eq.s32 %p152, %r1079, 0;
@%p152 bra BB6_265;
mov.f32 %f610, 0f00000000;
mov.f32 %f611, 0fBF800000;
fma.rn.f32 %f918, %f918, %f611, %f610;
BB6_265:
mul.f32 %f920, %f157, 0f3F000000;
abs.f32 %f612, %f920;
setp.neu.f32 %p153, %f612, 0f7F800000;
@%p153 bra BB6_267;
mov.f32 %f613, 0f00000000;
mul.rn.f32 %f920, %f920, %f613;
BB6_267:
mul.f32 %f614, %f920, 0f3F22F983;
cvt.rni.s32.f32 %r1439, %f614;
cvt.rn.f32.s32 %f615, %r1439;
neg.f32 %f616, %f615;
fma.rn.f32 %f618, %f616, %f593, %f920;
fma.rn.f32 %f620, %f616, %f595, %f618;
fma.rn.f32 %f921, %f616, %f597, %f620;
abs.f32 %f622, %f920;
setp.leu.f32 %p154, %f622, 0f47CE4780;
@%p154 bra BB6_278;
mov.b32 %r388, %f920;
shr.u32 %r389, %r388, 23;
shl.b32 %r1082, %r388, 8;
or.b32 %r390, %r1082, -2147483648;
mov.u32 %r1431, 0;
mov.u64 %rd169, __cudart_i2opi_f;
mov.u32 %r1430, -6;
mov.u64 %rd170, %rd61;
BB6_269:
.pragma "nounroll";
ld.const.u32 %r1085, [%rd169];
// inline asm
{
mad.lo.cc.u32 %r1083, %r1085, %r390, %r1431;
madc.hi.u32 %r1431, %r1085, %r390, 0;
}
// inline asm
st.local.u32 [%rd170], %r1083;
add.s64 %rd170, %rd170, 4;
add.s64 %rd169, %rd169, 4;
add.s32 %r1430, %r1430, 1;
setp.ne.s32 %p155, %r1430, 0;
@%p155 bra BB6_269;
and.b32 %r1088, %r389, 255;
add.s32 %r1089, %r1088, -128;
shr.u32 %r1090, %r1089, 5;
and.b32 %r395, %r388, -2147483648;
st.local.u32 [%rd62], %r1431;
mov.u32 %r1091, 6;
sub.s32 %r1092, %r1091, %r1090;
mul.wide.s32 %rd133, %r1092, 4;
add.s64 %rd72, %rd61, %rd133;
ld.local.u32 %r1432, [%rd72];
ld.local.u32 %r1433, [%rd72+-4];
and.b32 %r398, %r389, 31;
setp.eq.s32 %p156, %r398, 0;
@%p156 bra BB6_272;
mov.u32 %r1093, 32;
sub.s32 %r1094, %r1093, %r398;
shr.u32 %r1095, %r1433, %r1094;
shl.b32 %r1096, %r1432, %r398;
add.s32 %r1432, %r1095, %r1096;
ld.local.u32 %r1097, [%rd72+-8];
shr.u32 %r1098, %r1097, %r1094;
shl.b32 %r1099, %r1433, %r398;
add.s32 %r1433, %r1098, %r1099;
BB6_272:
shr.u32 %r1100, %r1433, 30;
shl.b32 %r1101, %r1432, 2;
add.s32 %r1434, %r1100, %r1101;
shl.b32 %r404, %r1433, 2;
shr.u32 %r1102, %r1434, 31;
shr.u32 %r1103, %r1432, 30;
add.s32 %r405, %r1102, %r1103;
setp.eq.s32 %p157, %r1102, 0;
@%p157 bra BB6_273;
not.b32 %r1104, %r1434;
neg.s32 %r1436, %r404;
setp.eq.s32 %p158, %r404, 0;
selp.u32 %r1105, 1, 0, %p158;
add.s32 %r1434, %r1105, %r1104;
xor.b32 %r1435, %r395, -2147483648;
bra.uni BB6_275;
BB6_273:
mov.u32 %r1435, %r395;
mov.u32 %r1436, %r404;
BB6_275:
clz.b32 %r1438, %r1434;
setp.eq.s32 %p159, %r1438, 0;
shl.b32 %r1106, %r1434, %r1438;
mov.u32 %r1107, 32;
sub.s32 %r1108, %r1107, %r1438;
shr.u32 %r1109, %r1436, %r1108;
add.s32 %r1110, %r1109, %r1106;
selp.b32 %r413, %r1434, %r1110, %p159;
mov.u32 %r1111, -921707870;
mul.hi.u32 %r1437, %r413, %r1111;
setp.eq.s32 %p160, %r395, 0;
neg.s32 %r1112, %r405;
selp.b32 %r1439, %r405, %r1112, %p160;
setp.lt.s32 %p161, %r1437, 1;
@%p161 bra BB6_277;
mul.lo.s32 %r1113, %r413, -921707870;
shr.u32 %r1114, %r1113, 31;
shl.b32 %r1115, %r1437, 1;
add.s32 %r1437, %r1114, %r1115;
add.s32 %r1438, %r1438, 1;
BB6_277:
mov.u32 %r1116, 126;
sub.s32 %r1117, %r1116, %r1438;
shl.b32 %r1118, %r1117, 23;
add.s32 %r1119, %r1437, 1;
shr.u32 %r1120, %r1119, 7;
add.s32 %r1121, %r1120, 1;
shr.u32 %r1122, %r1121, 1;
add.s32 %r1123, %r1122, %r1118;
or.b32 %r1124, %r1123, %r1435;
mov.b32 %f921, %r1124;
BB6_278:
mul.rn.f32 %f220, %f921, %f921;
and.b32 %r421, %r1439, 1;
setp.eq.s32 %p162, %r421, 0;
@%p162 bra BB6_280;
mov.f32 %f623, 0fBAB6061A;
mov.f32 %f624, 0f37CCF5CE;
fma.rn.f32 %f922, %f624, %f220, %f623;
bra.uni BB6_281;
BB6_280:
mov.f32 %f625, 0f3C08839E;
mov.f32 %f626, 0fB94CA1F9;
fma.rn.f32 %f922, %f626, %f220, %f625;
BB6_281:
@%p162 bra BB6_283;
mov.f32 %f627, 0f3D2AAAA5;
fma.rn.f32 %f628, %f922, %f220, %f627;
mov.f32 %f629, 0fBF000000;
fma.rn.f32 %f923, %f628, %f220, %f629;
bra.uni BB6_284;
BB6_283:
mov.f32 %f630, 0fBE2AAAA3;
fma.rn.f32 %f631, %f922, %f220, %f630;
mov.f32 %f632, 0f00000000;
fma.rn.f32 %f923, %f631, %f220, %f632;
BB6_284:
fma.rn.f32 %f924, %f923, %f921, %f921;
@%p162 bra BB6_286;
mov.f32 %f633, 0f3F800000;
fma.rn.f32 %f924, %f923, %f220, %f633;
BB6_286:
and.b32 %r1125, %r1439, 2;
setp.eq.s32 %p165, %r1125, 0;
@%p165 bra BB6_288;
mov.f32 %f634, 0f00000000;
mov.f32 %f635, 0fBF800000;
fma.rn.f32 %f924, %f924, %f635, %f634;
BB6_288:
mul.f32 %f636, %f157, %f157;
mul.f32 %f637, %f636, 0f3F000000;
mul.f32 %f638, %f918, %f924;
div.rn.f32 %f926, %f638, %f637;
BB6_289:
setp.eq.f32 %p166, %f158, 0f00000000;
mov.f32 %f939, %f952;
@%p166 bra BB6_337;
add.u64 %rd73, %SPL, 0;
abs.f32 %f640, %f158;
setp.neu.f32 %p167, %f640, 0f7F800000;
mov.f32 %f927, %f158;
@%p167 bra BB6_292;
mov.f32 %f641, 0f00000000;
mul.rn.f32 %f927, %f158, %f641;
BB6_292:
mul.f32 %f642, %f927, 0f3F22F983;
cvt.rni.s32.f32 %r1449, %f642;
cvt.rn.f32.s32 %f643, %r1449;
neg.f32 %f644, %f643;
mov.f32 %f645, 0f3FC90FDA;
fma.rn.f32 %f646, %f644, %f645, %f927;
mov.f32 %f647, 0f33A22168;
fma.rn.f32 %f648, %f644, %f647, %f646;
mov.f32 %f649, 0f27C234C5;
fma.rn.f32 %f928, %f644, %f649, %f648;
abs.f32 %f650, %f927;
add.s64 %rd74, %rd73, 24;
setp.leu.f32 %p168, %f650, 0f47CE4780;
@%p168 bra BB6_303;
mov.b32 %r423, %f927;
shr.u32 %r424, %r423, 23;
shl.b32 %r1128, %r423, 8;
or.b32 %r425, %r1128, -2147483648;
mov.u32 %r1441, 0;
mov.u64 %rd171, __cudart_i2opi_f;
mov.u32 %r1440, -6;
mov.u64 %rd172, %rd73;
BB6_294:
.pragma "nounroll";
ld.const.u32 %r1131, [%rd171];
// inline asm
{
mad.lo.cc.u32 %r1129, %r1131, %r425, %r1441;
madc.hi.u32 %r1441, %r1131, %r425, 0;
}
// inline asm
st.local.u32 [%rd172], %r1129;
add.s64 %rd172, %rd172, 4;
add.s64 %rd171, %rd171, 4;
add.s32 %r1440, %r1440, 1;
setp.ne.s32 %p169, %r1440, 0;
@%p169 bra BB6_294;
and.b32 %r1134, %r424, 255;
add.s32 %r1135, %r1134, -128;
shr.u32 %r1136, %r1135, 5;
and.b32 %r430, %r423, -2147483648;
st.local.u32 [%rd74], %r1441;
mov.u32 %r1137, 6;
sub.s32 %r1138, %r1137, %r1136;
mul.wide.s32 %rd136, %r1138, 4;
add.s64 %rd79, %rd73, %rd136;
ld.local.u32 %r1442, [%rd79];
ld.local.u32 %r1443, [%rd79+-4];
and.b32 %r433, %r424, 31;
setp.eq.s32 %p170, %r433, 0;
@%p170 bra BB6_297;
mov.u32 %r1139, 32;
sub.s32 %r1140, %r1139, %r433;
shr.u32 %r1141, %r1443, %r1140;
shl.b32 %r1142, %r1442, %r433;
add.s32 %r1442, %r1141, %r1142;
ld.local.u32 %r1143, [%rd79+-8];
shr.u32 %r1144, %r1143, %r1140;
shl.b32 %r1145, %r1443, %r433;
add.s32 %r1443, %r1144, %r1145;
BB6_297:
shr.u32 %r1146, %r1443, 30;
shl.b32 %r1147, %r1442, 2;
add.s32 %r1444, %r1146, %r1147;
shl.b32 %r439, %r1443, 2;
shr.u32 %r1148, %r1444, 31;
shr.u32 %r1149, %r1442, 30;
add.s32 %r440, %r1148, %r1149;
setp.eq.s32 %p171, %r1148, 0;
@%p171 bra BB6_298;
not.b32 %r1150, %r1444;
neg.s32 %r1446, %r439;
setp.eq.s32 %p172, %r439, 0;
selp.u32 %r1151, 1, 0, %p172;
add.s32 %r1444, %r1151, %r1150;
xor.b32 %r1445, %r430, -2147483648;
bra.uni BB6_300;
BB6_298:
mov.u32 %r1445, %r430;
mov.u32 %r1446, %r439;
BB6_300:
clz.b32 %r1448, %r1444;
setp.eq.s32 %p173, %r1448, 0;
shl.b32 %r1152, %r1444, %r1448;
mov.u32 %r1153, 32;
sub.s32 %r1154, %r1153, %r1448;
shr.u32 %r1155, %r1446, %r1154;
add.s32 %r1156, %r1155, %r1152;
selp.b32 %r448, %r1444, %r1156, %p173;
mov.u32 %r1157, -921707870;
mul.hi.u32 %r1447, %r448, %r1157;
setp.eq.s32 %p174, %r430, 0;
neg.s32 %r1158, %r440;
selp.b32 %r1449, %r440, %r1158, %p174;
setp.lt.s32 %p175, %r1447, 1;
@%p175 bra BB6_302;
mul.lo.s32 %r1159, %r448, -921707870;
shr.u32 %r1160, %r1159, 31;
shl.b32 %r1161, %r1447, 1;
add.s32 %r1447, %r1160, %r1161;
add.s32 %r1448, %r1448, 1;
BB6_302:
mov.u32 %r1162, 126;
sub.s32 %r1163, %r1162, %r1448;
shl.b32 %r1164, %r1163, 23;
add.s32 %r1165, %r1447, 1;
shr.u32 %r1166, %r1165, 7;
add.s32 %r1167, %r1166, 1;
shr.u32 %r1168, %r1167, 1;
add.s32 %r1169, %r1168, %r1164;
or.b32 %r1170, %r1169, %r1445;
mov.b32 %f928, %r1170;
BB6_303:
mul.rn.f32 %f239, %f928, %f928;
and.b32 %r456, %r1449, 1;
setp.eq.s32 %p176, %r456, 0;
@%p176 bra BB6_305;
mov.f32 %f651, 0fBAB6061A;
mov.f32 %f652, 0f37CCF5CE;
fma.rn.f32 %f929, %f652, %f239, %f651;
bra.uni BB6_306;
BB6_305:
mov.f32 %f653, 0f3C08839E;
mov.f32 %f654, 0fB94CA1F9;
fma.rn.f32 %f929, %f654, %f239, %f653;
BB6_306:
@%p176 bra BB6_308;
mov.f32 %f655, 0f3D2AAAA5;
fma.rn.f32 %f656, %f929, %f239, %f655;
mov.f32 %f657, 0fBF000000;
fma.rn.f32 %f930, %f656, %f239, %f657;
bra.uni BB6_309;
BB6_308:
mov.f32 %f658, 0fBE2AAAA3;
fma.rn.f32 %f659, %f929, %f239, %f658;
mov.f32 %f660, 0f00000000;
fma.rn.f32 %f930, %f659, %f239, %f660;
BB6_309:
fma.rn.f32 %f931, %f930, %f928, %f928;
@%p176 bra BB6_311;
mov.f32 %f661, 0f3F800000;
fma.rn.f32 %f931, %f930, %f239, %f661;
BB6_311:
and.b32 %r1171, %r1449, 2;
setp.eq.s32 %p179, %r1171, 0;
@%p179 bra BB6_313;
mov.f32 %f662, 0f00000000;
mov.f32 %f663, 0fBF800000;
fma.rn.f32 %f931, %f931, %f663, %f662;
BB6_313:
mul.f32 %f933, %f158, 0f3F000000;
abs.f32 %f664, %f933;
setp.neu.f32 %p180, %f664, 0f7F800000;
@%p180 bra BB6_315;
mov.f32 %f665, 0f00000000;
mul.rn.f32 %f933, %f933, %f665;
BB6_315:
mul.f32 %f666, %f933, 0f3F22F983;
cvt.rni.s32.f32 %r1459, %f666;
cvt.rn.f32.s32 %f667, %r1459;
neg.f32 %f668, %f667;
fma.rn.f32 %f670, %f668, %f645, %f933;
fma.rn.f32 %f672, %f668, %f647, %f670;
fma.rn.f32 %f934, %f668, %f649, %f672;
abs.f32 %f674, %f933;
setp.leu.f32 %p181, %f674, 0f47CE4780;
@%p181 bra BB6_326;
mov.b32 %r458, %f933;
shr.u32 %r459, %r458, 23;
shl.b32 %r1174, %r458, 8;
or.b32 %r460, %r1174, -2147483648;
mov.u32 %r1451, 0;
mov.u64 %rd173, __cudart_i2opi_f;
mov.u32 %r1450, -6;
mov.u64 %rd174, %rd73;
BB6_317:
.pragma "nounroll";
ld.const.u32 %r1177, [%rd173];
// inline asm
{
mad.lo.cc.u32 %r1175, %r1177, %r460, %r1451;
madc.hi.u32 %r1451, %r1177, %r460, 0;
}
// inline asm
st.local.u32 [%rd174], %r1175;
add.s64 %rd174, %rd174, 4;
add.s64 %rd173, %rd173, 4;
add.s32 %r1450, %r1450, 1;
setp.ne.s32 %p182, %r1450, 0;
@%p182 bra BB6_317;
and.b32 %r1180, %r459, 255;
add.s32 %r1181, %r1180, -128;
shr.u32 %r1182, %r1181, 5;
and.b32 %r465, %r458, -2147483648;
st.local.u32 [%rd74], %r1451;
mov.u32 %r1183, 6;
sub.s32 %r1184, %r1183, %r1182;
mul.wide.s32 %rd138, %r1184, 4;
add.s64 %rd84, %rd73, %rd138;
ld.local.u32 %r1452, [%rd84];
ld.local.u32 %r1453, [%rd84+-4];
and.b32 %r468, %r459, 31;
setp.eq.s32 %p183, %r468, 0;
@%p183 bra BB6_320;
mov.u32 %r1185, 32;
sub.s32 %r1186, %r1185, %r468;
shr.u32 %r1187, %r1453, %r1186;
shl.b32 %r1188, %r1452, %r468;
add.s32 %r1452, %r1187, %r1188;
ld.local.u32 %r1189, [%rd84+-8];
shr.u32 %r1190, %r1189, %r1186;
shl.b32 %r1191, %r1453, %r468;
add.s32 %r1453, %r1190, %r1191;
BB6_320:
shr.u32 %r1192, %r1453, 30;
shl.b32 %r1193, %r1452, 2;
add.s32 %r1454, %r1192, %r1193;
shl.b32 %r474, %r1453, 2;
shr.u32 %r1194, %r1454, 31;
shr.u32 %r1195, %r1452, 30;
add.s32 %r475, %r1194, %r1195;
setp.eq.s32 %p184, %r1194, 0;
@%p184 bra BB6_321;
not.b32 %r1196, %r1454;
neg.s32 %r1456, %r474;
setp.eq.s32 %p185, %r474, 0;
selp.u32 %r1197, 1, 0, %p185;
add.s32 %r1454, %r1197, %r1196;
xor.b32 %r1455, %r465, -2147483648;
bra.uni BB6_323;
BB6_321:
mov.u32 %r1455, %r465;
mov.u32 %r1456, %r474;
BB6_323:
clz.b32 %r1458, %r1454;
setp.eq.s32 %p186, %r1458, 0;
shl.b32 %r1198, %r1454, %r1458;
mov.u32 %r1199, 32;
sub.s32 %r1200, %r1199, %r1458;
shr.u32 %r1201, %r1456, %r1200;
add.s32 %r1202, %r1201, %r1198;
selp.b32 %r483, %r1454, %r1202, %p186;
mov.u32 %r1203, -921707870;
mul.hi.u32 %r1457, %r483, %r1203;
setp.eq.s32 %p187, %r465, 0;
neg.s32 %r1204, %r475;
selp.b32 %r1459, %r475, %r1204, %p187;
setp.lt.s32 %p188, %r1457, 1;
@%p188 bra BB6_325;
mul.lo.s32 %r1205, %r483, -921707870;
shr.u32 %r1206, %r1205, 31;
shl.b32 %r1207, %r1457, 1;
add.s32 %r1457, %r1206, %r1207;
add.s32 %r1458, %r1458, 1;
BB6_325:
mov.u32 %r1208, 126;
sub.s32 %r1209, %r1208, %r1458;
shl.b32 %r1210, %r1209, 23;
add.s32 %r1211, %r1457, 1;
shr.u32 %r1212, %r1211, 7;
add.s32 %r1213, %r1212, 1;
shr.u32 %r1214, %r1213, 1;
add.s32 %r1215, %r1214, %r1210;
or.b32 %r1216, %r1215, %r1455;
mov.b32 %f934, %r1216;
BB6_326:
mul.rn.f32 %f257, %f934, %f934;
and.b32 %r491, %r1459, 1;
setp.eq.s32 %p189, %r491, 0;
@%p189 bra BB6_328;
mov.f32 %f675, 0fBAB6061A;
mov.f32 %f676, 0f37CCF5CE;
fma.rn.f32 %f935, %f676, %f257, %f675;
bra.uni BB6_329;
BB6_328:
mov.f32 %f677, 0f3C08839E;
mov.f32 %f678, 0fB94CA1F9;
fma.rn.f32 %f935, %f678, %f257, %f677;
BB6_329:
@%p189 bra BB6_331;
mov.f32 %f679, 0f3D2AAAA5;
fma.rn.f32 %f680, %f935, %f257, %f679;
mov.f32 %f681, 0fBF000000;
fma.rn.f32 %f936, %f680, %f257, %f681;
bra.uni BB6_332;
BB6_331:
mov.f32 %f682, 0fBE2AAAA3;
fma.rn.f32 %f683, %f935, %f257, %f682;
mov.f32 %f684, 0f00000000;
fma.rn.f32 %f936, %f683, %f257, %f684;
BB6_332:
fma.rn.f32 %f937, %f936, %f934, %f934;
@%p189 bra BB6_334;
mov.f32 %f685, 0f3F800000;
fma.rn.f32 %f937, %f936, %f257, %f685;
BB6_334:
and.b32 %r1217, %r1459, 2;
setp.eq.s32 %p192, %r1217, 0;
@%p192 bra BB6_336;
mov.f32 %f686, 0f00000000;
mov.f32 %f687, 0fBF800000;
fma.rn.f32 %f937, %f937, %f687, %f686;
BB6_336:
mul.f32 %f688, %f158, %f158;
mul.f32 %f689, %f688, 0f3F000000;
mul.f32 %f690, %f931, %f937;
div.rn.f32 %f939, %f690, %f689;
BB6_337:
setp.eq.f32 %p193, %f159, 0f00000000;
@%p193 bra BB6_385;
add.u64 %rd85, %SPL, 0;
abs.f32 %f692, %f159;
setp.neu.f32 %p194, %f692, 0f7F800000;
mov.f32 %f940, %f159;
@%p194 bra BB6_340;
mov.f32 %f693, 0f00000000;
mul.rn.f32 %f940, %f159, %f693;
BB6_340:
mul.f32 %f694, %f940, 0f3F22F983;
cvt.rni.s32.f32 %r1469, %f694;
cvt.rn.f32.s32 %f695, %r1469;
neg.f32 %f696, %f695;
mov.f32 %f697, 0f3FC90FDA;
fma.rn.f32 %f698, %f696, %f697, %f940;
mov.f32 %f699, 0f33A22168;
fma.rn.f32 %f700, %f696, %f699, %f698;
mov.f32 %f701, 0f27C234C5;
fma.rn.f32 %f941, %f696, %f701, %f700;
abs.f32 %f702, %f940;
add.s64 %rd86, %rd85, 24;
setp.leu.f32 %p195, %f702, 0f47CE4780;
@%p195 bra BB6_351;
mov.b32 %r493, %f940;
shr.u32 %r494, %r493, 23;
shl.b32 %r1220, %r493, 8;
or.b32 %r495, %r1220, -2147483648;
mov.u32 %r1461, 0;
mov.u64 %rd175, __cudart_i2opi_f;
mov.u32 %r1460, -6;
mov.u64 %rd176, %rd85;
BB6_342:
.pragma "nounroll";
ld.const.u32 %r1223, [%rd175];
// inline asm
{
mad.lo.cc.u32 %r1221, %r1223, %r495, %r1461;
madc.hi.u32 %r1461, %r1223, %r495, 0;
}
// inline asm
st.local.u32 [%rd176], %r1221;
add.s64 %rd176, %rd176, 4;
add.s64 %rd175, %rd175, 4;
add.s32 %r1460, %r1460, 1;
setp.ne.s32 %p196, %r1460, 0;
@%p196 bra BB6_342;
and.b32 %r1226, %r494, 255;
add.s32 %r1227, %r1226, -128;
shr.u32 %r1228, %r1227, 5;
and.b32 %r500, %r493, -2147483648;
st.local.u32 [%rd86], %r1461;
mov.u32 %r1229, 6;
sub.s32 %r1230, %r1229, %r1228;
mul.wide.s32 %rd141, %r1230, 4;
add.s64 %rd91, %rd85, %rd141;
ld.local.u32 %r1462, [%rd91];
ld.local.u32 %r1463, [%rd91+-4];
and.b32 %r503, %r494, 31;
setp.eq.s32 %p197, %r503, 0;
@%p197 bra BB6_345;
mov.u32 %r1231, 32;
sub.s32 %r1232, %r1231, %r503;
shr.u32 %r1233, %r1463, %r1232;
shl.b32 %r1234, %r1462, %r503;
add.s32 %r1462, %r1233, %r1234;
ld.local.u32 %r1235, [%rd91+-8];
shr.u32 %r1236, %r1235, %r1232;
shl.b32 %r1237, %r1463, %r503;
add.s32 %r1463, %r1236, %r1237;
BB6_345:
shr.u32 %r1238, %r1463, 30;
shl.b32 %r1239, %r1462, 2;
add.s32 %r1464, %r1238, %r1239;
shl.b32 %r509, %r1463, 2;
shr.u32 %r1240, %r1464, 31;
shr.u32 %r1241, %r1462, 30;
add.s32 %r510, %r1240, %r1241;
setp.eq.s32 %p198, %r1240, 0;
@%p198 bra BB6_346;
not.b32 %r1242, %r1464;
neg.s32 %r1466, %r509;
setp.eq.s32 %p199, %r509, 0;
selp.u32 %r1243, 1, 0, %p199;
add.s32 %r1464, %r1243, %r1242;
xor.b32 %r1465, %r500, -2147483648;
bra.uni BB6_348;
BB6_346:
mov.u32 %r1465, %r500;
mov.u32 %r1466, %r509;
BB6_348:
clz.b32 %r1468, %r1464;
setp.eq.s32 %p200, %r1468, 0;
shl.b32 %r1244, %r1464, %r1468;
mov.u32 %r1245, 32;
sub.s32 %r1246, %r1245, %r1468;
shr.u32 %r1247, %r1466, %r1246;
add.s32 %r1248, %r1247, %r1244;
selp.b32 %r518, %r1464, %r1248, %p200;
mov.u32 %r1249, -921707870;
mul.hi.u32 %r1467, %r518, %r1249;
setp.eq.s32 %p201, %r500, 0;
neg.s32 %r1250, %r510;
selp.b32 %r1469, %r510, %r1250, %p201;
setp.lt.s32 %p202, %r1467, 1;
@%p202 bra BB6_350;
mul.lo.s32 %r1251, %r518, -921707870;
shr.u32 %r1252, %r1251, 31;
shl.b32 %r1253, %r1467, 1;
add.s32 %r1467, %r1252, %r1253;
add.s32 %r1468, %r1468, 1;
BB6_350:
mov.u32 %r1254, 126;
sub.s32 %r1255, %r1254, %r1468;
shl.b32 %r1256, %r1255, 23;
add.s32 %r1257, %r1467, 1;
shr.u32 %r1258, %r1257, 7;
add.s32 %r1259, %r1258, 1;
shr.u32 %r1260, %r1259, 1;
add.s32 %r1261, %r1260, %r1256;
or.b32 %r1262, %r1261, %r1465;
mov.b32 %f941, %r1262;
BB6_351:
mul.rn.f32 %f276, %f941, %f941;
and.b32 %r526, %r1469, 1;
setp.eq.s32 %p203, %r526, 0;
@%p203 bra BB6_353;
mov.f32 %f703, 0fBAB6061A;
mov.f32 %f704, 0f37CCF5CE;
fma.rn.f32 %f942, %f704, %f276, %f703;
bra.uni BB6_354;
BB6_353:
mov.f32 %f705, 0f3C08839E;
mov.f32 %f706, 0fB94CA1F9;
fma.rn.f32 %f942, %f706, %f276, %f705;
BB6_354:
@%p203 bra BB6_356;
mov.f32 %f707, 0f3D2AAAA5;
fma.rn.f32 %f708, %f942, %f276, %f707;
mov.f32 %f709, 0fBF000000;
fma.rn.f32 %f943, %f708, %f276, %f709;
bra.uni BB6_357;
BB6_356:
mov.f32 %f710, 0fBE2AAAA3;
fma.rn.f32 %f711, %f942, %f276, %f710;
mov.f32 %f712, 0f00000000;
fma.rn.f32 %f943, %f711, %f276, %f712;
BB6_357:
fma.rn.f32 %f944, %f943, %f941, %f941;
@%p203 bra BB6_359;
mov.f32 %f713, 0f3F800000;
fma.rn.f32 %f944, %f943, %f276, %f713;
BB6_359:
and.b32 %r1263, %r1469, 2;
setp.eq.s32 %p206, %r1263, 0;
@%p206 bra BB6_361;
mov.f32 %f714, 0f00000000;
mov.f32 %f715, 0fBF800000;
fma.rn.f32 %f944, %f944, %f715, %f714;
BB6_361:
mul.f32 %f946, %f159, 0f3F000000;
abs.f32 %f716, %f946;
setp.neu.f32 %p207, %f716, 0f7F800000;
@%p207 bra BB6_363;
mov.f32 %f717, 0f00000000;
mul.rn.f32 %f946, %f946, %f717;
BB6_363:
mul.f32 %f718, %f946, 0f3F22F983;
cvt.rni.s32.f32 %r1479, %f718;
cvt.rn.f32.s32 %f719, %r1479;
neg.f32 %f720, %f719;
fma.rn.f32 %f722, %f720, %f697, %f946;
fma.rn.f32 %f724, %f720, %f699, %f722;
fma.rn.f32 %f947, %f720, %f701, %f724;
abs.f32 %f726, %f946;
setp.leu.f32 %p208, %f726, 0f47CE4780;
@%p208 bra BB6_374;
mov.b32 %r528, %f946;
shr.u32 %r529, %r528, 23;
shl.b32 %r1266, %r528, 8;
or.b32 %r530, %r1266, -2147483648;
mov.u32 %r1471, 0;
mov.u64 %rd177, __cudart_i2opi_f;
mov.u32 %r1470, -6;
mov.u64 %rd178, %rd85;
BB6_365:
.pragma "nounroll";
ld.const.u32 %r1269, [%rd177];
// inline asm
{
mad.lo.cc.u32 %r1267, %r1269, %r530, %r1471;
madc.hi.u32 %r1471, %r1269, %r530, 0;
}
// inline asm
st.local.u32 [%rd178], %r1267;
add.s64 %rd178, %rd178, 4;
add.s64 %rd177, %rd177, 4;
add.s32 %r1470, %r1470, 1;
setp.ne.s32 %p209, %r1470, 0;
@%p209 bra BB6_365;
and.b32 %r1272, %r529, 255;
add.s32 %r1273, %r1272, -128;
shr.u32 %r1274, %r1273, 5;
and.b32 %r535, %r528, -2147483648;
st.local.u32 [%rd86], %r1471;
mov.u32 %r1275, 6;
sub.s32 %r1276, %r1275, %r1274;
mul.wide.s32 %rd143, %r1276, 4;
add.s64 %rd96, %rd85, %rd143;
ld.local.u32 %r1472, [%rd96];
ld.local.u32 %r1473, [%rd96+-4];
and.b32 %r538, %r529, 31;
setp.eq.s32 %p210, %r538, 0;
@%p210 bra BB6_368;
mov.u32 %r1277, 32;
sub.s32 %r1278, %r1277, %r538;
shr.u32 %r1279, %r1473, %r1278;
shl.b32 %r1280, %r1472, %r538;
add.s32 %r1472, %r1279, %r1280;
ld.local.u32 %r1281, [%rd96+-8];
shr.u32 %r1282, %r1281, %r1278;
shl.b32 %r1283, %r1473, %r538;
add.s32 %r1473, %r1282, %r1283;
BB6_368:
shr.u32 %r1284, %r1473, 30;
shl.b32 %r1285, %r1472, 2;
add.s32 %r1474, %r1284, %r1285;
shl.b32 %r544, %r1473, 2;
shr.u32 %r1286, %r1474, 31;
shr.u32 %r1287, %r1472, 30;
add.s32 %r545, %r1286, %r1287;
setp.eq.s32 %p211, %r1286, 0;
@%p211 bra BB6_369;
not.b32 %r1288, %r1474;
neg.s32 %r1476, %r544;
setp.eq.s32 %p212, %r544, 0;
selp.u32 %r1289, 1, 0, %p212;
add.s32 %r1474, %r1289, %r1288;
xor.b32 %r1475, %r535, -2147483648;
bra.uni BB6_371;
BB6_369:
mov.u32 %r1475, %r535;
mov.u32 %r1476, %r544;
BB6_371:
clz.b32 %r1478, %r1474;
setp.eq.s32 %p213, %r1478, 0;
shl.b32 %r1290, %r1474, %r1478;
mov.u32 %r1291, 32;
sub.s32 %r1292, %r1291, %r1478;
shr.u32 %r1293, %r1476, %r1292;
add.s32 %r1294, %r1293, %r1290;
selp.b32 %r553, %r1474, %r1294, %p213;
mov.u32 %r1295, -921707870;
mul.hi.u32 %r1477, %r553, %r1295;
setp.eq.s32 %p214, %r535, 0;
neg.s32 %r1296, %r545;
selp.b32 %r1479, %r545, %r1296, %p214;
setp.lt.s32 %p215, %r1477, 1;
@%p215 bra BB6_373;
mul.lo.s32 %r1297, %r553, -921707870;
shr.u32 %r1298, %r1297, 31;
shl.b32 %r1299, %r1477, 1;
add.s32 %r1477, %r1298, %r1299;
add.s32 %r1478, %r1478, 1;
BB6_373:
mov.u32 %r1300, 126;
sub.s32 %r1301, %r1300, %r1478;
shl.b32 %r1302, %r1301, 23;
add.s32 %r1303, %r1477, 1;
shr.u32 %r1304, %r1303, 7;
add.s32 %r1305, %r1304, 1;
shr.u32 %r1306, %r1305, 1;
add.s32 %r1307, %r1306, %r1302;
or.b32 %r1308, %r1307, %r1475;
mov.b32 %f947, %r1308;
BB6_374:
mul.rn.f32 %f294, %f947, %f947;
and.b32 %r561, %r1479, 1;
setp.eq.s32 %p216, %r561, 0;
@%p216 bra BB6_376;
mov.f32 %f727, 0fBAB6061A;
mov.f32 %f728, 0f37CCF5CE;
fma.rn.f32 %f948, %f728, %f294, %f727;
bra.uni BB6_377;
BB6_376:
mov.f32 %f729, 0f3C08839E;
mov.f32 %f730, 0fB94CA1F9;
fma.rn.f32 %f948, %f730, %f294, %f729;
BB6_377:
@%p216 bra BB6_379;
mov.f32 %f731, 0f3D2AAAA5;
fma.rn.f32 %f732, %f948, %f294, %f731;
mov.f32 %f733, 0fBF000000;
fma.rn.f32 %f949, %f732, %f294, %f733;
bra.uni BB6_380;
BB6_379:
mov.f32 %f734, 0fBE2AAAA3;
fma.rn.f32 %f735, %f948, %f294, %f734;
mov.f32 %f736, 0f00000000;
fma.rn.f32 %f949, %f735, %f294, %f736;
BB6_380:
fma.rn.f32 %f950, %f949, %f947, %f947;
@%p216 bra BB6_382;
mov.f32 %f737, 0f3F800000;
fma.rn.f32 %f950, %f949, %f294, %f737;
BB6_382:
and.b32 %r1309, %r1479, 2;
setp.eq.s32 %p219, %r1309, 0;
@%p219 bra BB6_384;
mov.f32 %f738, 0f00000000;
mov.f32 %f739, 0fBF800000;
fma.rn.f32 %f950, %f950, %f739, %f738;
BB6_384:
mul.f32 %f740, %f159, %f159;
mul.f32 %f741, %f740, 0f3F000000;
mul.f32 %f742, %f944, %f950;
div.rn.f32 %f952, %f742, %f741;
BB6_385:
add.f32 %f743, %f861, %f874;
add.f32 %f744, %f743, %f887;
add.f32 %f745, %f744, %f900;
div.rn.f32 %f746, %f861, %f745;
div.rn.f32 %f747, %f874, %f745;
div.rn.f32 %f748, %f887, %f745;
div.rn.f32 %f749, %f900, %f745;
add.f32 %f750, %f3, 0fBF800000;
add.f32 %f751, %f2, 0fBF800000;
tex.2d.v4.f32.f32 {%f752, %f753, %f754, %f755}, [%rd97, {%f751, %f750}];
tex.2d.v4.f32.f32 {%f756, %f757, %f758, %f759}, [%rd97, {%f2, %f750}];
add.f32 %f760, %f2, 0f3F800000;
tex.2d.v4.f32.f32 {%f761, %f762, %f763, %f764}, [%rd97, {%f760, %f750}];
add.f32 %f765, %f2, 0f40000000;
tex.2d.v4.f32.f32 {%f766, %f767, %f768, %f769}, [%rd97, {%f765, %f750}];
mul.f32 %f770, %f747, %f756;
fma.rn.f32 %f771, %f746, %f752, %f770;
fma.rn.f32 %f772, %f748, %f761, %f771;
fma.rn.f32 %f773, %f749, %f766, %f772;
tex.2d.v4.f32.f32 {%f774, %f775, %f776, %f777}, [%rd97, {%f751, %f3}];
tex.2d.v4.f32.f32 {%f778, %f779, %f780, %f781}, [%rd97, {%f2, %f3}];
tex.2d.v4.f32.f32 {%f782, %f783, %f784, %f785}, [%rd97, {%f760, %f3}];
tex.2d.v4.f32.f32 {%f786, %f787, %f788, %f789}, [%rd97, {%f765, %f3}];
mul.f32 %f790, %f747, %f778;
fma.rn.f32 %f791, %f746, %f774, %f790;
fma.rn.f32 %f792, %f748, %f782, %f791;
fma.rn.f32 %f793, %f749, %f786, %f792;
add.f32 %f794, %f3, 0f3F800000;
tex.2d.v4.f32.f32 {%f795, %f796, %f797, %f798}, [%rd97, {%f751, %f794}];
tex.2d.v4.f32.f32 {%f799, %f800, %f801, %f802}, [%rd97, {%f2, %f794}];
tex.2d.v4.f32.f32 {%f803, %f804, %f805, %f806}, [%rd97, {%f760, %f794}];
tex.2d.v4.f32.f32 {%f807, %f808, %f809, %f810}, [%rd97, {%f765, %f794}];
mul.f32 %f811, %f747, %f799;
fma.rn.f32 %f812, %f746, %f795, %f811;
fma.rn.f32 %f813, %f748, %f803, %f812;
fma.rn.f32 %f814, %f749, %f807, %f813;
add.f32 %f815, %f3, 0f40000000;
tex.2d.v4.f32.f32 {%f816, %f817, %f818, %f819}, [%rd97, {%f751, %f815}];
tex.2d.v4.f32.f32 {%f820, %f821, %f822, %f823}, [%rd97, {%f2, %f815}];
tex.2d.v4.f32.f32 {%f824, %f825, %f826, %f827}, [%rd97, {%f760, %f815}];
tex.2d.v4.f32.f32 {%f828, %f829, %f830, %f831}, [%rd97, {%f765, %f815}];
add.f32 %f832, %f913, %f926;
add.f32 %f833, %f832, %f939;
add.f32 %f834, %f833, %f952;
div.rn.f32 %f835, %f913, %f834;
div.rn.f32 %f836, %f926, %f834;
div.rn.f32 %f837, %f939, %f834;
div.rn.f32 %f838, %f952, %f834;
mul.f32 %f839, %f747, %f820;
fma.rn.f32 %f840, %f746, %f816, %f839;
fma.rn.f32 %f841, %f748, %f824, %f840;
fma.rn.f32 %f842, %f749, %f828, %f841;
mul.f32 %f843, %f836, %f793;
fma.rn.f32 %f844, %f835, %f773, %f843;
fma.rn.f32 %f845, %f837, %f814, %f844;
fma.rn.f32 %f846, %f838, %f842, %f845;
setp.gt.s32 %p220, %r567, 8;
selp.f32 %f847, 0f477FFF00, 0f437F0000, %p220;
mul.f32 %f848, %f847, %f846;
cvt.rzi.u32.f32 %r1310, %f848;
mad.lo.s32 %r1319, %r2, %r564, %r1;
cvta.to.global.u64 %rd144, %rd98;
cvt.s64.s32 %rd145, %r1319;
add.s64 %rd146, %rd144, %rd145;
st.global.u8 [%rd146], %r1310;
BB6_386:
ret;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment