Last active
July 15, 2021 15:36
-
-
Save ericcano/26cca8a25e7d6d04b75956db244a2690 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.entry <unnamed>::eigenCrossProductSoA(double *, const double *, const double *, unsigned long, unsigned long)( | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_0, | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_1, | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_2, | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_3, | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_4 | |
) | |
{ | |
.reg .pred %p<2>; | |
.reg .b32 %r<5>; | |
.reg .f64 %fd<16>; | |
.reg .b64 %rd<24>; | |
$L__func_begin6: | |
ld.param.u64 %rd2, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_0]; | |
ld.param.u64 %rd3, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_1]; | |
ld.param.u64 %rd4, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_2]; | |
ld.param.u64 %rd6, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_3]; | |
ld.param.u64 %rd5, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe20eigenCrossProductSoAEPdPKdS2_mm_param_4]; | |
mov.u32 %r1, %ntid.x; | |
mov.u32 %r2, %ctaid.x; | |
mov.u32 %r3, %tid.x; | |
mad.lo.s32 %r4, %r2, %r1, %r3; | |
cvt.u64.u32 %rd1, %r4; | |
setp.ge.u64 %p1, %rd1, %rd6; | |
@%p1 bra $L__BB6_2; | |
cvta.to.global.u64 %rd7, %rd3; | |
shl.b64 %rd8, %rd1, 3; | |
add.s64 %rd9, %rd7, %rd8; | |
cvta.to.global.u64 %rd10, %rd2; | |
add.s64 %rd11, %rd10, %rd8; | |
shl.b64 %rd12, %rd5, 3; | |
add.s64 %rd13, %rd9, %rd12; | |
add.s64 %rd14, %rd1, %rd5; | |
add.s64 %rd15, %rd14, %rd5; | |
cvta.to.global.u64 %rd16, %rd4; | |
shl.b64 %rd17, %rd15, 3; | |
add.s64 %rd18, %rd16, %rd17; | |
ld.global.nc.f64 %fd1, [%rd18]; | |
ld.global.nc.f64 %fd2, [%rd13]; | |
mul.f64 %fd3, %fd2, %fd1; | |
add.s64 %rd19, %rd13, %rd12; | |
sub.s64 %rd20, %rd18, %rd12; | |
ld.global.nc.f64 %fd4, [%rd20]; | |
ld.global.nc.f64 %fd5, [%rd19]; | |
mul.f64 %fd6, %fd5, %fd4; | |
sub.f64 %fd7, %fd3, %fd6; | |
sub.s64 %rd21, %rd20, %rd12; | |
ld.global.nc.f64 %fd8, [%rd21]; | |
mul.f64 %fd9, %fd5, %fd8; | |
ld.global.nc.f64 %fd10, [%rd9]; | |
mul.f64 %fd11, %fd10, %fd1; | |
sub.f64 %fd12, %fd9, %fd11; | |
mul.f64 %fd13, %fd10, %fd4; | |
mul.f64 %fd14, %fd2, %fd8; | |
sub.f64 %fd15, %fd13, %fd14; | |
st.global.f64 [%rd11], %fd7; | |
add.s64 %rd22, %rd11, %rd12; | |
st.global.f64 [%rd22], %fd12; | |
add.s64 %rd23, %rd22, %rd12; | |
st.global.f64 [%rd23], %fd15; | |
$L__BB6_2: | |
ret; | |
$L__func_end6: | |
} | |
.entry <unnamed>::embeddedCrossProductSoA(testSoA::SoA)( | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23embeddedCrossProductSoAEN7testSoA3SoAE_param_0[128] | |
) | |
{ | |
.reg .pred %p<2>; | |
.reg .b32 %r<5>; | |
.reg .f64 %fd<16>; | |
.reg .b64 %rd<36>; | |
$L__func_begin7: | |
ld.param.u64 %rd13, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23embeddedCrossProductSoAEN7testSoA3SoAE_param_0+88]; | |
ld.param.u64 %rd12, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23embeddedCrossProductSoAEN7testSoA3SoAE_param_0+80]; | |
ld.param.u64 %rd11, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23embeddedCrossProductSoAEN7testSoA3SoAE_param_0+72]; | |
ld.param.u64 %rd10, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23embeddedCrossProductSoAEN7testSoA3SoAE_param_0+64]; | |
ld.param.u64 %rd9, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23embeddedCrossProductSoAEN7testSoA3SoAE_param_0+56]; | |
ld.param.u64 %rd8, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23embeddedCrossProductSoAEN7testSoA3SoAE_param_0+48]; | |
ld.param.u64 %rd3, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23embeddedCrossProductSoAEN7testSoA3SoAE_param_0+8]; | |
mov.u32 %r1, %ntid.x; | |
mov.u32 %r2, %ctaid.x; | |
mov.u32 %r3, %tid.x; | |
mad.lo.s32 %r4, %r2, %r1, %r3; | |
cvt.u64.u32 %rd1, %r4; | |
setp.le.u64 %p1, %rd3, %rd1; | |
@%p1 bra $L__BB7_2; | |
cvta.to.global.u64 %rd18, %rd12; | |
cvta.to.global.u64 %rd19, %rd10; | |
cvta.to.global.u64 %rd20, %rd8; | |
shl.b64 %rd21, %rd1, 3; | |
add.s64 %rd22, %rd20, %rd21; | |
add.s64 %rd23, %rd19, %rd21; | |
add.s64 %rd24, %rd18, %rd21; | |
shl.b64 %rd25, %rd9, 3; | |
add.s64 %rd26, %rd22, %rd25; | |
shl.b64 %rd27, %rd11, 4; | |
add.s64 %rd28, %rd23, %rd27; | |
ld.global.f64 %fd1, [%rd28]; | |
ld.global.f64 %fd2, [%rd26]; | |
mul.f64 %fd3, %fd2, %fd1; | |
add.s64 %rd29, %rd26, %rd25; | |
shl.b64 %rd30, %rd11, 3; | |
sub.s64 %rd31, %rd28, %rd30; | |
ld.global.f64 %fd4, [%rd31]; | |
ld.global.f64 %fd5, [%rd29]; | |
mul.f64 %fd6, %fd5, %fd4; | |
sub.f64 %fd7, %fd3, %fd6; | |
sub.s64 %rd32, %rd31, %rd30; | |
ld.global.f64 %fd8, [%rd32]; | |
mul.f64 %fd9, %fd5, %fd8; | |
ld.global.f64 %fd10, [%rd22]; | |
mul.f64 %fd11, %fd10, %fd1; | |
sub.f64 %fd12, %fd9, %fd11; | |
mul.f64 %fd13, %fd10, %fd4; | |
mul.f64 %fd14, %fd2, %fd8; | |
sub.f64 %fd15, %fd13, %fd14; | |
st.global.f64 [%rd24], %fd7; | |
shl.b64 %rd33, %rd13, 3; | |
add.s64 %rd34, %rd24, %rd33; | |
st.global.f64 [%rd34], %fd12; | |
add.s64 %rd35, %rd34, %rd33; | |
st.global.f64 [%rd35], %fd15; | |
$L__BB7_2: | |
ret; | |
$L__func_end7: | |
} | |
.entry <unnamed>::embeddedCrossProductLocalObjectSoA(std::byte *, unsigned long)( | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00003688_00000000_7_test_v9_cuda_cpp1_ii_e64742fe34embeddedCrossProductLocalObjectSoAEPSt4bytem_param_0, | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00003688_00000000_7_test_v9_cuda_cpp1_ii_e64742fe34embeddedCrossProductLocalObjectSoAEPSt4bytem_param_1 | |
) | |
{ | |
.reg .pred %p<2>; | |
.reg .b32 %r<5>; | |
.reg .f64 %fd<16>; | |
.reg .b64 %rd<25>; | |
$L__func_begin8: | |
ld.param.u64 %rd2, [_ZN71_GLOBAL__N__47_tmpxft_00003688_00000000_7_test_v9_cuda_cpp1_ii_e64742fe34embeddedCrossProductLocalObjectSoAEPSt4bytem_param_0]; | |
ld.param.u64 %rd3, [_ZN71_GLOBAL__N__47_tmpxft_00003688_00000000_7_test_v9_cuda_cpp1_ii_e64742fe34embeddedCrossProductLocalObjectSoAEPSt4bytem_param_1]; | |
mov.u32 %r1, %ntid.x; | |
mov.u32 %r2, %ctaid.x; | |
mov.u32 %r3, %tid.x; | |
mad.lo.s32 %r4, %r2, %r1, %r3; | |
cvt.u64.u32 %rd1, %r4; | |
setp.ge.u64 %p1, %rd1, %rd3; | |
@%p1 bra $L__BB8_2; | |
cvta.to.global.u64 %rd4, %rd2; | |
shl.b64 %rd5, %rd3, 3; | |
add.s64 %rd6, %rd5, 127; | |
and.b64 %rd7, %rd6, -128; | |
mul.lo.s64 %rd8, %rd7, 3; | |
add.s64 %rd9, %rd4, %rd8; | |
mul.lo.s64 %rd10, %rd7, 6; | |
add.s64 %rd11, %rd4, %rd10; | |
shl.b64 %rd12, %rd1, 3; | |
add.s64 %rd13, %rd9, %rd12; | |
add.s64 %rd14, %rd11, %rd12; | |
add.s64 %rd15, %rd13, %rd7; | |
shl.b64 %rd16, %rd6, 1; | |
and.b64 %rd17, %rd16, -256; | |
add.s64 %rd18, %rd14, %rd17; | |
ld.global.f64 %fd1, [%rd18]; | |
ld.global.f64 %fd2, [%rd15]; | |
mul.f64 %fd3, %fd2, %fd1; | |
add.s64 %rd19, %rd15, %rd7; | |
add.s64 %rd20, %rd14, %rd7; | |
ld.global.f64 %fd4, [%rd20]; | |
ld.global.f64 %fd5, [%rd19]; | |
mul.f64 %fd6, %fd5, %fd4; | |
sub.f64 %fd7, %fd3, %fd6; | |
add.s64 %rd21, %rd19, %rd7; | |
ld.global.f64 %fd8, [%rd21]; | |
mul.f64 %fd9, %fd5, %fd8; | |
ld.global.f64 %fd10, [%rd13]; | |
mul.f64 %fd11, %fd10, %fd1; | |
sub.f64 %fd12, %fd9, %fd11; | |
mul.f64 %fd13, %fd10, %fd4; | |
mul.f64 %fd14, %fd2, %fd8; | |
sub.f64 %fd15, %fd13, %fd14; | |
add.s64 %rd22, %rd18, %rd7; | |
st.global.f64 [%rd22], %fd7; | |
add.s64 %rd23, %rd22, %rd7; | |
st.global.f64 [%rd23], %fd12; | |
add.s64 %rd24, %rd23, %rd7; | |
st.global.f64 [%rd24], %fd15; | |
$L__BB8_2: | |
ret; | |
$L__func_end8: | |
} | |
.entry <unnamed>::indirectCrossProductSoA(testSoA::SoA, testSoA::SoA, testSoA::SoA, unsigned long)( | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0[128], | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1[128], | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2[128], | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3 | |
) | |
{ | |
.reg .pred %p<2>; | |
.reg .b32 %r<5>; | |
.reg .f64 %fd<16>; | |
.reg .b64 %rd<70>; | |
$L__func_begin3: | |
ld.param.u64 %rd50, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3]; | |
ld.param.u64 %rd39, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+40]; | |
ld.param.u64 %rd38, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+32]; | |
ld.param.u64 %rd37, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+24]; | |
ld.param.u64 %rd23, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+40]; | |
ld.param.u64 %rd22, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+32]; | |
ld.param.u64 %rd21, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+24]; | |
ld.param.u64 %rd7, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+40]; | |
ld.param.u64 %rd6, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+32]; | |
ld.param.u64 %rd5, [_ZN71_GLOBAL__N__47_tmpxft_00003474_00000000_7_test_v9_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+24]; | |
mov.u32 %r1, %ctaid.x; | |
mov.u32 %r2, %ntid.x; | |
mov.u32 %r3, %tid.x; | |
mad.lo.s32 %r4, %r1, %r2, %r3; | |
cvt.u64.u32 %rd1, %r4; | |
setp.ge.u64 %p1, %rd1, %rd50; | |
@%p1 bra $L__BB3_2; | |
cvta.to.global.u64 %rd51, %rd39; | |
cvta.to.global.u64 %rd52, %rd38; | |
cvta.to.global.u64 %rd53, %rd37; | |
cvta.to.global.u64 %rd54, %rd23; | |
cvta.to.global.u64 %rd55, %rd22; | |
cvta.to.global.u64 %rd56, %rd21; | |
cvta.to.global.u64 %rd57, %rd7; | |
cvta.to.global.u64 %rd58, %rd6; | |
cvta.to.global.u64 %rd59, %rd5; | |
shl.b64 %rd60, %rd1, 3; | |
add.s64 %rd61, %rd59, %rd60; | |
add.s64 %rd62, %rd58, %rd60; | |
add.s64 %rd63, %rd57, %rd60; | |
add.s64 %rd64, %rd56, %rd60; | |
add.s64 %rd65, %rd55, %rd60; | |
add.s64 %rd66, %rd54, %rd60; | |
add.s64 %rd67, %rd53, %rd60; | |
add.s64 %rd68, %rd52, %rd60; | |
add.s64 %rd69, %rd51, %rd60; | |
ld.global.nc.f64 %fd1, [%rd69]; | |
ld.global.nc.f64 %fd2, [%rd65]; | |
mul.f64 %fd3, %fd2, %fd1; | |
ld.global.nc.f64 %fd4, [%rd68]; | |
ld.global.nc.f64 %fd5, [%rd66]; | |
mul.f64 %fd6, %fd5, %fd4; | |
sub.f64 %fd7, %fd3, %fd6; | |
st.global.f64 [%rd61], %fd7; | |
ld.global.nc.f64 %fd8, [%rd67]; | |
mul.f64 %fd9, %fd5, %fd8; | |
ld.global.nc.f64 %fd10, [%rd64]; | |
mul.f64 %fd11, %fd10, %fd1; | |
sub.f64 %fd12, %fd9, %fd11; | |
st.global.f64 [%rd62], %fd12; | |
mul.f64 %fd13, %fd10, %fd4; | |
mul.f64 %fd14, %fd2, %fd8; | |
sub.f64 %fd15, %fd13, %fd14; | |
st.global.f64 [%rd63], %fd15; | |
$L__BB3_2: | |
ret; | |
$L__func_end3: | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment