Skip to content

Instantly share code, notes, and snippets.

@ericcano
Created July 13, 2021 14:44
Show Gist options
  • Save ericcano/c7da516615108634b48ac51b78bf4a39 to your computer and use it in GitHub Desktop.
Save ericcano/c7da516615108634b48ac51b78bf4a39 to your computer and use it in GitHub Desktop.
PTX result (excerpt for compilation result of PTX result (excerpt for compilation result of ea7ec59cbe597bca22e89fa59d4d35fbfea39388 of https://github.com/ericcano/soa) of https://github.com/ericcano/soa)
.entry <unnamed>::indirectCrossProductSoA(testSoA::SoA, testSoA::SoA, testSoA::SoA, unsigned long)(
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0[80],
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1[80],
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2[80],
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3
)
{
.reg .pred %p<2>;
.reg .b32 %r<5>;
.reg .f64 %fd<22>;
.reg .b64 %rd<55>;
$L__func_begin3:
ld.param.u64 %rd32, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3];
ld.param.u64 %rd27, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+40];
ld.param.u64 %rd26, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+32];
ld.param.u64 %rd25, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+24];
ld.param.u64 %rd22, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2];
ld.param.u64 %rd17, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+40];
ld.param.u64 %rd16, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+32];
ld.param.u64 %rd15, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+24];
ld.param.u64 %rd12, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1];
ld.param.u64 %rd7, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+40];
ld.param.u64 %rd6, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+32];
ld.param.u64 %rd5, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+24];
ld.param.u64 %rd2, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0];
mov.u32 %r1, %ntid.x;
mov.u32 %r2, %ctaid.x;
mov.u32 %r3, %tid.x;
mad.lo.s32 %r4, %r2, %r1, %r3;
cvt.u64.u32 %rd1, %r4;
setp.ge.u64 %p1, %rd1, %rd32;
@%p1 bra $L__BB3_2;
cvta.to.global.u64 %rd33, %rd2;
cvta.to.global.u64 %rd34, %rd12;
cvta.to.global.u64 %rd35, %rd22;
add.s64 %rd36, %rd34, %rd16;
shl.b64 %rd37, %rd1, 3;
add.s64 %rd38, %rd36, %rd37;
add.s64 %rd39, %rd35, %rd27;
add.s64 %rd40, %rd39, %rd37;
ld.global.f64 %fd1, [%rd40];
ld.global.f64 %fd2, [%rd38];
mul.f64 %fd3, %fd2, %fd1;
add.s64 %rd41, %rd34, %rd17;
add.s64 %rd42, %rd41, %rd37;
add.s64 %rd43, %rd35, %rd26;
add.s64 %rd44, %rd43, %rd37;
ld.global.f64 %fd4, [%rd44];
ld.global.f64 %fd5, [%rd42];
mul.f64 %fd6, %fd5, %fd4;
sub.f64 %fd7, %fd3, %fd6;
add.s64 %rd45, %rd33, %rd5;
add.s64 %rd46, %rd45, %rd37;
st.global.f64 [%rd46], %fd7;
add.s64 %rd47, %rd35, %rd25;
add.s64 %rd48, %rd47, %rd37;
ld.global.f64 %fd8, [%rd48];
ld.global.f64 %fd9, [%rd42];
mul.f64 %fd10, %fd9, %fd8;
add.s64 %rd49, %rd34, %rd15;
add.s64 %rd50, %rd49, %rd37;
ld.global.f64 %fd11, [%rd40];
ld.global.f64 %fd12, [%rd50];
mul.f64 %fd13, %fd12, %fd11;
sub.f64 %fd14, %fd10, %fd13;
add.s64 %rd51, %rd33, %rd6;
add.s64 %rd52, %rd51, %rd37;
st.global.f64 [%rd52], %fd14;
ld.global.f64 %fd15, [%rd44];
ld.global.f64 %fd16, [%rd50];
mul.f64 %fd17, %fd16, %fd15;
ld.global.f64 %fd18, [%rd48];
ld.global.f64 %fd19, [%rd38];
mul.f64 %fd20, %fd19, %fd18;
sub.f64 %fd21, %fd17, %fd20;
add.s64 %rd53, %rd33, %rd7;
add.s64 %rd54, %rd53, %rd37;
st.global.f64 [%rd54], %fd21;
$L__BB3_2:
ret;
$L__func_end3:
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment