Last active
July 12, 2021 09:45
-
-
Save ericcano/6a3265af706c7607e0448b609aa92cb9 to your computer and use it in GitHub Desktop.
PTX result (excerpt for compilation result of cc0114e4336d9bae130e9c2875b68ffd85a64823 of https://github.com/ericcano/soa)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.entry <unnamed>::indirectCrossProductSoA(testSoA::SoA, testSoA::SoA, testSoA::SoA, unsigned long)( | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0[80], | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1[80], | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2[80], | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3 | |
) | |
{ | |
.reg .pred %p<2>; | |
.reg .b32 %r<5>; | |
.reg .f64 %fd<22>; | |
.reg .b64 %rd<52>; | |
$L__func_begin3: | |
ld.param.u64 %rd32, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3]; | |
ld.param.u64 %rd27, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+40]; | |
ld.param.u64 %rd26, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+32]; | |
ld.param.u64 %rd25, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+24]; | |
ld.param.u64 %rd17, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+40]; | |
ld.param.u64 %rd16, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+32]; | |
ld.param.u64 %rd15, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+24]; | |
ld.param.u64 %rd7, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+40]; | |
ld.param.u64 %rd6, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+32]; | |
ld.param.u64 %rd5, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+24]; | |
mov.u32 %r1, %ctaid.x; | |
mov.u32 %r2, %ntid.x; | |
mov.u32 %r3, %tid.x; | |
mad.lo.s32 %r4, %r1, %r2, %r3; | |
cvt.u64.u32 %rd1, %r4; | |
setp.ge.u64 %p1, %rd1, %rd32; | |
@%p1 bra $L__BB3_2; | |
cvta.to.global.u64 %rd33, %rd27; | |
cvta.to.global.u64 %rd34, %rd26; | |
cvta.to.global.u64 %rd35, %rd25; | |
cvta.to.global.u64 %rd36, %rd17; | |
cvta.to.global.u64 %rd37, %rd16; | |
cvta.to.global.u64 %rd38, %rd15; | |
cvta.to.global.u64 %rd39, %rd7; | |
cvta.to.global.u64 %rd40, %rd6; | |
cvta.to.global.u64 %rd41, %rd5; | |
shl.b64 %rd42, %rd1, 3; | |
add.s64 %rd43, %rd41, %rd42; | |
add.s64 %rd44, %rd40, %rd42; | |
add.s64 %rd45, %rd39, %rd42; | |
add.s64 %rd46, %rd38, %rd42; | |
add.s64 %rd47, %rd37, %rd42; | |
add.s64 %rd48, %rd36, %rd42; | |
add.s64 %rd49, %rd35, %rd42; | |
add.s64 %rd50, %rd34, %rd42; | |
add.s64 %rd51, %rd33, %rd42; | |
ld.global.f64 %fd1, [%rd51]; | |
ld.global.f64 %fd2, [%rd47]; | |
mul.f64 %fd3, %fd2, %fd1; | |
ld.global.f64 %fd4, [%rd50]; | |
ld.global.f64 %fd5, [%rd48]; | |
mul.f64 %fd6, %fd5, %fd4; | |
sub.f64 %fd7, %fd3, %fd6; | |
st.global.f64 [%rd43], %fd7; | |
ld.global.f64 %fd8, [%rd49]; | |
ld.global.f64 %fd9, [%rd48]; | |
mul.f64 %fd10, %fd9, %fd8; | |
ld.global.f64 %fd11, [%rd51]; | |
ld.global.f64 %fd12, [%rd46]; | |
mul.f64 %fd13, %fd12, %fd11; | |
sub.f64 %fd14, %fd10, %fd13; | |
st.global.f64 [%rd44], %fd14; | |
ld.global.f64 %fd15, [%rd50]; | |
ld.global.f64 %fd16, [%rd46]; | |
mul.f64 %fd17, %fd16, %fd15; | |
ld.global.f64 %fd18, [%rd49]; | |
ld.global.f64 %fd19, [%rd47]; | |
mul.f64 %fd20, %fd19, %fd18; | |
sub.f64 %fd21, %fd17, %fd20; | |
st.global.f64 [%rd45], %fd21; | |
$L__BB3_2: | |
ret; | |
$L__func_end3: | |
} | |
.entry <unnamed>::eigenCrossProductSoA(double *, const double *, const double *, unsigned long, unsigned long)( | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_0, | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_1, | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_2, | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_3, | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_4 | |
) | |
{ | |
.reg .pred %p<2>; | |
.reg .b32 %r<5>; | |
.reg .f64 %fd<16>; | |
.reg .b64 %rd<24>; | |
$L__func_begin6: | |
ld.param.u64 %rd2, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_0]; | |
ld.param.u64 %rd3, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_1]; | |
ld.param.u64 %rd4, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_2]; | |
ld.param.u64 %rd6, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_3]; | |
ld.param.u64 %rd5, [_ZN71_GLOBAL__N__47_tmpxft_00000638_00000000_7_test_v7_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_4]; | |
mov.u32 %r1, %ntid.x; | |
mov.u32 %r2, %ctaid.x; | |
mov.u32 %r3, %tid.x; | |
mad.lo.s32 %r4, %r2, %r1, %r3; | |
cvt.u64.u32 %rd1, %r4; | |
setp.ge.u64 %p1, %rd1, %rd6; | |
@%p1 bra $L__BB6_2; | |
cvta.to.global.u64 %rd7, %rd3; | |
shl.b64 %rd8, %rd1, 3; | |
add.s64 %rd9, %rd7, %rd8; | |
cvta.to.global.u64 %rd10, %rd2; | |
add.s64 %rd11, %rd10, %rd8; | |
shl.b64 %rd12, %rd5, 3; | |
add.s64 %rd13, %rd9, %rd12; | |
add.s64 %rd14, %rd1, %rd5; | |
add.s64 %rd15, %rd14, %rd5; | |
cvta.to.global.u64 %rd16, %rd4; | |
shl.b64 %rd17, %rd15, 3; | |
add.s64 %rd18, %rd16, %rd17; | |
ld.global.f64 %fd1, [%rd18]; | |
ld.global.f64 %fd2, [%rd13]; | |
mul.f64 %fd3, %fd2, %fd1; | |
add.s64 %rd19, %rd13, %rd12; | |
sub.s64 %rd20, %rd18, %rd12; | |
ld.global.f64 %fd4, [%rd20]; | |
ld.global.f64 %fd5, [%rd19]; | |
mul.f64 %fd6, %fd5, %fd4; | |
sub.f64 %fd7, %fd3, %fd6; | |
sub.s64 %rd21, %rd20, %rd12; | |
ld.global.f64 %fd8, [%rd21]; | |
mul.f64 %fd9, %fd5, %fd8; | |
ld.global.f64 %fd10, [%rd9]; | |
mul.f64 %fd11, %fd10, %fd1; | |
sub.f64 %fd12, %fd9, %fd11; | |
mul.f64 %fd13, %fd10, %fd4; | |
mul.f64 %fd14, %fd2, %fd8; | |
sub.f64 %fd15, %fd13, %fd14; | |
st.global.f64 [%rd11], %fd7; | |
add.s64 %rd22, %rd11, %rd12; | |
st.global.f64 [%rd22], %fd12; | |
add.s64 %rd23, %rd22, %rd12; | |
st.global.f64 [%rd23], %fd15; | |
$L__BB6_2: | |
ret; | |
$L__func_end6: | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This output is generated from the Makefile generated
.tmp/test_v7_cuda.cu.ptx
with extra filtering: