Created
July 13, 2021 14:44
-
-
Save ericcano/c7da516615108634b48ac51b78bf4a39 to your computer and use it in GitHub Desktop.
PTX result (excerpt for compilation result of PTX result (excerpt for compilation result of ea7ec59cbe597bca22e89fa59d4d35fbfea39388 of https://github.com/ericcano/soa) of https://github.com/ericcano/soa)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.entry <unnamed>::indirectCrossProductSoA(testSoA::SoA, testSoA::SoA, testSoA::SoA, unsigned long)( | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0[80], | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1[80], | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2[80], | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3 | |
) | |
{ | |
.reg .pred %p<2>; | |
.reg .b32 %r<5>; | |
.reg .f64 %fd<22>; | |
.reg .b64 %rd<55>; | |
$L__func_begin3: | |
ld.param.u64 %rd32, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3]; | |
ld.param.u64 %rd27, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+40]; | |
ld.param.u64 %rd26, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+32]; | |
ld.param.u64 %rd25, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+24]; | |
ld.param.u64 %rd22, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2]; | |
ld.param.u64 %rd17, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+40]; | |
ld.param.u64 %rd16, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+32]; | |
ld.param.u64 %rd15, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+24]; | |
ld.param.u64 %rd12, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1]; | |
ld.param.u64 %rd7, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+40]; | |
ld.param.u64 %rd6, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+32]; | |
ld.param.u64 %rd5, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+24]; | |
ld.param.u64 %rd2, [_ZN71_GLOBAL__N__47_tmpxft_00005e18_00000000_7_test_v8_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0]; | |
mov.u32 %r1, %ntid.x; | |
mov.u32 %r2, %ctaid.x; | |
mov.u32 %r3, %tid.x; | |
mad.lo.s32 %r4, %r2, %r1, %r3; | |
cvt.u64.u32 %rd1, %r4; | |
setp.ge.u64 %p1, %rd1, %rd32; | |
@%p1 bra $L__BB3_2; | |
cvta.to.global.u64 %rd33, %rd2; | |
cvta.to.global.u64 %rd34, %rd12; | |
cvta.to.global.u64 %rd35, %rd22; | |
add.s64 %rd36, %rd34, %rd16; | |
shl.b64 %rd37, %rd1, 3; | |
add.s64 %rd38, %rd36, %rd37; | |
add.s64 %rd39, %rd35, %rd27; | |
add.s64 %rd40, %rd39, %rd37; | |
ld.global.f64 %fd1, [%rd40]; | |
ld.global.f64 %fd2, [%rd38]; | |
mul.f64 %fd3, %fd2, %fd1; | |
add.s64 %rd41, %rd34, %rd17; | |
add.s64 %rd42, %rd41, %rd37; | |
add.s64 %rd43, %rd35, %rd26; | |
add.s64 %rd44, %rd43, %rd37; | |
ld.global.f64 %fd4, [%rd44]; | |
ld.global.f64 %fd5, [%rd42]; | |
mul.f64 %fd6, %fd5, %fd4; | |
sub.f64 %fd7, %fd3, %fd6; | |
add.s64 %rd45, %rd33, %rd5; | |
add.s64 %rd46, %rd45, %rd37; | |
st.global.f64 [%rd46], %fd7; | |
add.s64 %rd47, %rd35, %rd25; | |
add.s64 %rd48, %rd47, %rd37; | |
ld.global.f64 %fd8, [%rd48]; | |
ld.global.f64 %fd9, [%rd42]; | |
mul.f64 %fd10, %fd9, %fd8; | |
add.s64 %rd49, %rd34, %rd15; | |
add.s64 %rd50, %rd49, %rd37; | |
ld.global.f64 %fd11, [%rd40]; | |
ld.global.f64 %fd12, [%rd50]; | |
mul.f64 %fd13, %fd12, %fd11; | |
sub.f64 %fd14, %fd10, %fd13; | |
add.s64 %rd51, %rd33, %rd6; | |
add.s64 %rd52, %rd51, %rd37; | |
st.global.f64 [%rd52], %fd14; | |
ld.global.f64 %fd15, [%rd44]; | |
ld.global.f64 %fd16, [%rd50]; | |
mul.f64 %fd17, %fd16, %fd15; | |
ld.global.f64 %fd18, [%rd48]; | |
ld.global.f64 %fd19, [%rd38]; | |
mul.f64 %fd20, %fd19, %fd18; | |
sub.f64 %fd21, %fd17, %fd20; | |
add.s64 %rd53, %rd33, %rd7; | |
add.s64 %rd54, %rd53, %rd37; | |
st.global.f64 [%rd54], %fd21; | |
$L__BB3_2: | |
ret; | |
$L__func_end3: | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment