-
-
Save ericcano/ad6586cb0619d6a42cc91606a6556855 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.entry _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m( | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0[80], | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1[80], | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2[80], | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3 | |
) | |
{ | |
.local .align 8 .b8 __local_depot5[712]; | |
.reg .b64 %SP; | |
.reg .b64 %SPL; | |
.reg .pred %p<66>; | |
.reg .b32 %r<48>; | |
.reg .f64 %fd<25>; | |
.reg .b64 %rd<312>; | |
//test_v7_cuda.cu:57 __global__ void indirectCrossProductSoA(testSoA::SoA r, const testSoA::SoA a, const testSoA::SoA b, size_t nElements) { | |
.loc 18 57 0 | |
$L__func_begin5: | |
//test_v7_cuda.cu:57 __global__ void indirectCrossProductSoA(testSoA::SoA r, const testSoA::SoA a, const testSoA::SoA b, size_t nElements) { | |
.loc 18 57 0 | |
mov.u64 %SPL, __local_depot5; | |
cvta.local.u64 %SP, %SPL; | |
ld.param.u64 %rd44, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+8]; | |
ld.param.u64 %rd45, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+16]; | |
ld.param.u64 %rd46, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+24]; | |
ld.param.u64 %rd47, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+32]; | |
ld.param.u64 %rd48, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+40]; | |
ld.param.u64 %rd49, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+48]; | |
ld.param.u64 %rd50, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+56]; | |
ld.param.u64 %rd51, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+64]; | |
ld.param.u64 %rd52, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+72]; | |
ld.param.u64 %rd34, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+8]; | |
ld.param.u64 %rd35, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+16]; | |
ld.param.u64 %rd36, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+24]; | |
ld.param.u64 %rd37, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+32]; | |
ld.param.u64 %rd38, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+40]; | |
ld.param.u64 %rd39, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+48]; | |
ld.param.u64 %rd40, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+56]; | |
ld.param.u64 %rd41, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+64]; | |
ld.param.u64 %rd42, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+72]; | |
ld.param.u64 %rd24, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+8]; | |
ld.param.u64 %rd25, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+16]; | |
ld.param.u64 %rd26, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+24]; | |
ld.param.u64 %rd27, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+32]; | |
ld.param.u64 %rd28, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+40]; | |
ld.param.u64 %rd29, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+48]; | |
ld.param.u64 %rd30, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+56]; | |
ld.param.u64 %rd31, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+64]; | |
ld.param.u64 %rd32, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+72]; | |
ld.param.u64 %rd53, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3]; | |
ld.param.u64 %rd43, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2]; | |
ld.param.u64 %rd33, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1]; | |
ld.param.u64 %rd23, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0]; | |
st.u64 [%SP+384], %rd32; | |
st.u64 [%SP+376], %rd31; | |
st.u64 [%SP+368], %rd30; | |
st.u64 [%SP+360], %rd29; | |
st.u64 [%SP+352], %rd28; | |
st.u64 [%SP+344], %rd27; | |
st.u64 [%SP+336], %rd26; | |
st.u64 [%SP+328], %rd25; | |
st.u64 [%SP+320], %rd24; | |
st.u64 [%SP+312], %rd23; | |
st.u64 [%SP+464], %rd42; | |
st.u64 [%SP+456], %rd41; | |
st.u64 [%SP+448], %rd40; | |
st.u64 [%SP+440], %rd39; | |
st.u64 [%SP+432], %rd38; | |
st.u64 [%SP+424], %rd37; | |
st.u64 [%SP+416], %rd36; | |
st.u64 [%SP+408], %rd35; | |
st.u64 [%SP+400], %rd34; | |
st.u64 [%SP+392], %rd33; | |
st.u64 [%SP+544], %rd52; | |
st.u64 [%SP+536], %rd51; | |
st.u64 [%SP+528], %rd50; | |
st.u64 [%SP+520], %rd49; | |
st.u64 [%SP+512], %rd48; | |
st.u64 [%SP+504], %rd47; | |
st.u64 [%SP+496], %rd46; | |
st.u64 [%SP+488], %rd45; | |
st.u64 [%SP+480], %rd44; | |
st.u64 [%SP+472], %rd43; | |
$L__tmp377: | |
//test_v7_cuda.cu:58 size_t i = blockIdx.x * blockDim.x + threadIdx.x; | |
.loc 18 58 14 | |
mov.u32 %r1, %ctaid.x; | |
mov.u32 %r2, %ntid.x; | |
mul.lo.s32 %r3, %r1, %r2; | |
mov.u32 %r4, %tid.x; | |
add.s32 %r5, %r3, %r4; | |
cvt.u64.u32 %rd1, %r5; | |
$L__tmp378: | |
//test_v7_cuda.cu:59 if (i >= nElements) return; | |
.loc 18 59 5 | |
setp.ge.u64 %p1, %rd1, %rd53; | |
not.pred %p2, %p1; | |
@%p2 bra $L__BB5_2; | |
bra.uni $L__BB5_1; | |
$L__BB5_1: | |
$L__tmp379: | |
//test_v7_cuda.cu:59 if (i >= nElements) return; | |
.loc 18 59 25 | |
bra.uni $L__BB5_45; | |
$L__tmp380: | |
$L__BB5_2: | |
.loc 18 0 25 | |
add.u64 %rd54, %SP, 312; | |
mov.b64 %rd55, %rd54; | |
st.u64 [%SP+256], %rd55; | |
mov.b64 %rd2, %rd1; | |
$L__tmp381: | |
//test_v7_cuda.cu:62 auto ri = r[i]; | |
.loc 18 62 15 | |
bra.uni $L__tmp382; | |
$L__tmp382: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6763 | |
ld.u64 %rd56, [%SP+256]; | |
mov.b64 %rd57, %rd56; | |
st.u64 [%SP+248], %rd57; | |
mov.b64 %rd58, %rd2; | |
$L__tmp383: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6763 | |
bra.uni $L__tmp384; | |
$L__tmp384: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd59, [%SP+248]; | |
setp.ne.s64 %p3, %rd59, 0; | |
not.pred %p4, %p3; | |
not.pred %p5, %p4; | |
@%p5 bra $L__BB5_4; | |
bra.uni $L__BB5_3; | |
$L__BB5_3: | |
mov.u32 %r6, 0; | |
mov.b32 %r7, %r6; | |
bra.uni $L__BB5_4; | |
$L__tmp385: | |
$L__BB5_4: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6782 | |
ld.u64 %rd60, [%SP+256]; | |
ld.u64 %rd61, [%rd60+24]; | |
ld.u64 %rd62, [%SP+256]; | |
ld.u64 %rd63, [%rd62+32]; | |
ld.u64 %rd64, [%SP+256]; | |
ld.u64 %rd65, [%rd64+40]; | |
ld.u64 %rd66, [%SP+256]; | |
ld.u64 %rd67, [%rd66+48]; | |
ld.u64 %rd68, [%SP+256]; | |
ld.u64 %rd69, [%rd68+56]; | |
ld.u64 %rd70, [%SP+256]; | |
ld.u64 %rd71, [%rd70+64]; | |
add.u64 %rd72, %SP, 264; | |
mov.b64 %rd73, %rd72; | |
st.u64 [%SP+232], %rd73; | |
mov.b64 %rd74, %rd2; | |
$L__tmp386: | |
.loc 11 0 6782 | |
mov.b64 %rd75, %rd61; | |
$L__tmp387: | |
mov.b64 %rd76, %rd63; | |
$L__tmp388: | |
mov.b64 %rd77, %rd65; | |
$L__tmp389: | |
mov.b64 %rd78, %rd67; | |
$L__tmp390: | |
mov.b64 %rd79, %rd69; | |
$L__tmp391: | |
mov.b64 %rd80, %rd71; | |
st.u64 [%SP+240], %rd80; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6782 | |
bra.uni $L__tmp392; | |
$L__tmp392: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd81, [%SP+232]; | |
$L__tmp393: | |
.loc 11 0 5739 | |
mov.b64 %rd82, %rd74; | |
$L__tmp394: | |
mov.b64 %rd83, %rd75; | |
$L__tmp395: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp396; | |
$L__tmp396: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd84, %rd82, 3; | |
add.s64 %rd85, %rd83, %rd84; | |
st.u64 [%rd81], %rd85; | |
$L__tmp397: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd86, [%SP+232]; | |
mov.b64 %rd87, %rd74; | |
$L__tmp398: | |
.loc 11 0 5752 | |
mov.b64 %rd88, %rd76; | |
$L__tmp399: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp400; | |
$L__tmp400: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd89, %rd87, 3; | |
add.s64 %rd90, %rd88, %rd89; | |
st.u64 [%rd86+8], %rd90; | |
$L__tmp401: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd91, [%SP+232]; | |
mov.b64 %rd92, %rd74; | |
$L__tmp402: | |
.loc 11 0 5765 | |
mov.b64 %rd93, %rd77; | |
$L__tmp403: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp404; | |
$L__tmp404: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd94, %rd92, 3; | |
add.s64 %rd95, %rd93, %rd94; | |
st.u64 [%rd91+16], %rd95; | |
$L__tmp405: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd96, [%SP+232]; | |
mov.b64 %rd97, %rd74; | |
$L__tmp406: | |
.loc 11 0 5778 | |
mov.b64 %rd98, %rd78; | |
$L__tmp407: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp408; | |
$L__tmp408: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd99, %rd97, 1; | |
add.s64 %rd100, %rd98, %rd99; | |
st.u64 [%rd96+24], %rd100; | |
$L__tmp409: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd101, [%SP+232]; | |
mov.b64 %rd102, %rd74; | |
$L__tmp410: | |
.loc 11 0 5801 | |
mov.b64 %rd103, %rd79; | |
$L__tmp411: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp412; | |
$L__tmp412: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd104, %rd102, 2; | |
add.s64 %rd105, %rd103, %rd104; | |
st.u64 [%rd101+32], %rd105; | |
$L__tmp413: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd106, [%SP+232]; | |
ld.u64 %rd107, [%SP+240]; | |
mov.b64 %rd108, %rd74; | |
$L__tmp414: | |
.loc 11 0 5822 | |
mov.b64 %rd109, %rd107; | |
st.u64 [%SP+224], %rd109; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp415; | |
$L__tmp415: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd110, [%SP+224]; | |
shl.b64 %rd111, %rd108, 3; | |
add.s64 %rd112, %rd110, %rd111; | |
st.u64 [%rd106+40], %rd112; | |
$L__tmp416: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6782 | |
ld.u64 %rd113, [%SP+256]; | |
setp.ne.s64 %p6, %rd113, 0; | |
not.pred %p7, %p6; | |
not.pred %p8, %p7; | |
@%p8 bra $L__BB5_6; | |
bra.uni $L__BB5_5; | |
$L__BB5_5: | |
mov.u32 %r8, 0; | |
mov.b32 %r9, %r8; | |
bra.uni $L__BB5_6; | |
$L__BB5_6: | |
ld.u64 %rd114, [%SP+264]; | |
ld.u64 %rd115, [%SP+272]; | |
ld.u64 %rd116, [%SP+280]; | |
ld.u64 %rd117, [%SP+288]; | |
ld.u64 %rd118, [%SP+296]; | |
ld.u64 %rd119, [%SP+304]; | |
$L__tmp417: | |
//test_v7_cuda.cu:62 auto ri = r[i]; | |
.loc 18 62 15 | |
st.u64 [%SP+704], %rd119; | |
st.u64 [%SP+696], %rd118; | |
st.u64 [%SP+688], %rd117; | |
st.u64 [%SP+680], %rd116; | |
st.u64 [%SP+672], %rd115; | |
st.u64 [%SP+664], %rd114; | |
add.u64 %rd120, %SP, 392; | |
mov.b64 %rd121, %rd120; | |
st.u64 [%SP+32], %rd121; | |
mov.b64 %rd3, %rd1; | |
$L__tmp418: | |
//test_v7_cuda.cu:63 crossProduct(ri, a[i], b[i]); | |
.loc 18 63 22 | |
bra.uni $L__tmp419; | |
$L__tmp419: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
ld.u64 %rd122, [%SP+32]; | |
mov.b64 %rd123, %rd122; | |
st.u64 [%SP+24], %rd123; | |
mov.b64 %rd124, %rd3; | |
$L__tmp420: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
bra.uni $L__tmp421; | |
$L__tmp421: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd125, [%SP+24]; | |
setp.ne.s64 %p9, %rd125, 0; | |
not.pred %p10, %p9; | |
not.pred %p11, %p10; | |
@%p11 bra $L__BB5_8; | |
bra.uni $L__BB5_7; | |
$L__BB5_7: | |
mov.u32 %r10, 0; | |
mov.b32 %r11, %r10; | |
bra.uni $L__BB5_8; | |
$L__tmp422: | |
$L__BB5_8: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd126, [%SP+32]; | |
ld.u64 %rd127, [%rd126+24]; | |
ld.u64 %rd128, [%SP+32]; | |
ld.u64 %rd129, [%rd128+32]; | |
ld.u64 %rd130, [%SP+32]; | |
ld.u64 %rd131, [%rd130+40]; | |
ld.u64 %rd132, [%SP+32]; | |
ld.u64 %rd133, [%rd132+48]; | |
ld.u64 %rd134, [%SP+32]; | |
ld.u64 %rd135, [%rd134+56]; | |
ld.u64 %rd136, [%SP+32]; | |
ld.u64 %rd137, [%rd136+64]; | |
add.u64 %rd138, %SP, 40; | |
mov.b64 %rd139, %rd138; | |
st.u64 [%SP+8], %rd139; | |
mov.b64 %rd140, %rd3; | |
$L__tmp423: | |
.loc 11 0 6996 | |
mov.b64 %rd141, %rd127; | |
$L__tmp424: | |
mov.b64 %rd142, %rd129; | |
$L__tmp425: | |
mov.b64 %rd143, %rd131; | |
$L__tmp426: | |
mov.b64 %rd144, %rd133; | |
$L__tmp427: | |
mov.b64 %rd145, %rd135; | |
$L__tmp428: | |
mov.b64 %rd146, %rd137; | |
st.u64 [%SP+16], %rd146; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
bra.uni $L__tmp429; | |
$L__tmp429: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd147, [%SP+8]; | |
$L__tmp430: | |
.loc 11 0 5739 | |
mov.b64 %rd148, %rd140; | |
$L__tmp431: | |
mov.b64 %rd149, %rd141; | |
$L__tmp432: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp433; | |
$L__tmp433: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd150, %rd148, 3; | |
add.s64 %rd151, %rd149, %rd150; | |
st.u64 [%rd147], %rd151; | |
$L__tmp434: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd152, [%SP+8]; | |
mov.b64 %rd153, %rd140; | |
$L__tmp435: | |
.loc 11 0 5752 | |
mov.b64 %rd154, %rd142; | |
$L__tmp436: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp437; | |
$L__tmp437: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd155, %rd153, 3; | |
add.s64 %rd156, %rd154, %rd155; | |
st.u64 [%rd152+8], %rd156; | |
$L__tmp438: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd157, [%SP+8]; | |
mov.b64 %rd158, %rd140; | |
$L__tmp439: | |
.loc 11 0 5765 | |
mov.b64 %rd159, %rd143; | |
$L__tmp440: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp441; | |
$L__tmp441: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd160, %rd158, 3; | |
add.s64 %rd161, %rd159, %rd160; | |
st.u64 [%rd157+16], %rd161; | |
$L__tmp442: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd162, [%SP+8]; | |
mov.b64 %rd163, %rd140; | |
$L__tmp443: | |
.loc 11 0 5778 | |
mov.b64 %rd164, %rd144; | |
$L__tmp444: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp445; | |
$L__tmp445: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd165, %rd163, 1; | |
add.s64 %rd166, %rd164, %rd165; | |
st.u64 [%rd162+24], %rd166; | |
$L__tmp446: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd167, [%SP+8]; | |
mov.b64 %rd168, %rd140; | |
$L__tmp447: | |
.loc 11 0 5801 | |
mov.b64 %rd169, %rd145; | |
$L__tmp448: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp449; | |
$L__tmp449: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd170, %rd168, 2; | |
add.s64 %rd171, %rd169, %rd170; | |
st.u64 [%rd167+32], %rd171; | |
$L__tmp450: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd172, [%SP+8]; | |
ld.u64 %rd173, [%SP+16]; | |
mov.b64 %rd174, %rd140; | |
$L__tmp451: | |
.loc 11 0 5822 | |
mov.b64 %rd175, %rd173; | |
st.u64 [%SP+0], %rd175; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp452; | |
$L__tmp452: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd176, [%SP+0]; | |
shl.b64 %rd177, %rd174, 3; | |
add.s64 %rd178, %rd176, %rd177; | |
st.u64 [%rd172+40], %rd178; | |
$L__tmp453: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd179, [%SP+32]; | |
setp.ne.s64 %p12, %rd179, 0; | |
not.pred %p13, %p12; | |
not.pred %p14, %p13; | |
@%p14 bra $L__BB5_10; | |
bra.uni $L__BB5_9; | |
$L__BB5_9: | |
mov.u32 %r12, 0; | |
mov.b32 %r13, %r12; | |
bra.uni $L__BB5_10; | |
$L__BB5_10: | |
ld.u64 %rd180, [%SP+40]; | |
ld.u64 %rd181, [%SP+48]; | |
ld.u64 %rd182, [%SP+56]; | |
ld.u64 %rd183, [%SP+64]; | |
ld.u64 %rd184, [%SP+72]; | |
ld.u64 %rd185, [%SP+80]; | |
$L__tmp454: | |
//test_v7_cuda.cu:63 crossProduct(ri, a[i], b[i]); | |
.loc 18 63 22 | |
st.u64 [%SP+592], %rd185; | |
st.u64 [%SP+584], %rd184; | |
st.u64 [%SP+576], %rd183; | |
st.u64 [%SP+568], %rd182; | |
st.u64 [%SP+560], %rd181; | |
st.u64 [%SP+552], %rd180; | |
add.u64 %rd186, %SP, 552; | |
mov.b64 %rd187, %rd186; | |
st.u64 [%SP+600], %rd187; | |
add.u64 %rd188, %SP, 472; | |
mov.b64 %rd189, %rd188; | |
st.u64 [%SP+120], %rd189; | |
mov.b64 %rd4, %rd1; | |
$L__tmp455: | |
//test_v7_cuda.cu:63 crossProduct(ri, a[i], b[i]); | |
.loc 18 63 28 | |
bra.uni $L__tmp456; | |
$L__tmp456: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
ld.u64 %rd190, [%SP+120]; | |
mov.b64 %rd191, %rd190; | |
st.u64 [%SP+112], %rd191; | |
mov.b64 %rd192, %rd4; | |
$L__tmp457: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
bra.uni $L__tmp458; | |
$L__tmp458: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd193, [%SP+112]; | |
setp.ne.s64 %p15, %rd193, 0; | |
not.pred %p16, %p15; | |
not.pred %p17, %p16; | |
@%p17 bra $L__BB5_12; | |
bra.uni $L__BB5_11; | |
$L__BB5_11: | |
mov.u32 %r14, 0; | |
mov.b32 %r15, %r14; | |
bra.uni $L__BB5_12; | |
$L__tmp459: | |
$L__BB5_12: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd194, [%SP+120]; | |
ld.u64 %rd195, [%rd194+24]; | |
ld.u64 %rd196, [%SP+120]; | |
ld.u64 %rd197, [%rd196+32]; | |
ld.u64 %rd198, [%SP+120]; | |
ld.u64 %rd199, [%rd198+40]; | |
ld.u64 %rd200, [%SP+120]; | |
ld.u64 %rd201, [%rd200+48]; | |
ld.u64 %rd202, [%SP+120]; | |
ld.u64 %rd203, [%rd202+56]; | |
ld.u64 %rd204, [%SP+120]; | |
ld.u64 %rd205, [%rd204+64]; | |
add.u64 %rd206, %SP, 128; | |
mov.b64 %rd207, %rd206; | |
st.u64 [%SP+96], %rd207; | |
mov.b64 %rd208, %rd4; | |
$L__tmp460: | |
.loc 11 0 6996 | |
mov.b64 %rd209, %rd195; | |
$L__tmp461: | |
mov.b64 %rd210, %rd197; | |
$L__tmp462: | |
mov.b64 %rd211, %rd199; | |
$L__tmp463: | |
mov.b64 %rd212, %rd201; | |
$L__tmp464: | |
mov.b64 %rd213, %rd203; | |
$L__tmp465: | |
mov.b64 %rd214, %rd205; | |
st.u64 [%SP+104], %rd214; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
bra.uni $L__tmp466; | |
$L__tmp466: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd215, [%SP+96]; | |
$L__tmp467: | |
.loc 11 0 5739 | |
mov.b64 %rd216, %rd208; | |
$L__tmp468: | |
mov.b64 %rd217, %rd209; | |
$L__tmp469: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp470; | |
$L__tmp470: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd218, %rd216, 3; | |
add.s64 %rd219, %rd217, %rd218; | |
st.u64 [%rd215], %rd219; | |
$L__tmp471: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd220, [%SP+96]; | |
mov.b64 %rd221, %rd208; | |
$L__tmp472: | |
.loc 11 0 5752 | |
mov.b64 %rd222, %rd210; | |
$L__tmp473: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp474; | |
$L__tmp474: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd223, %rd221, 3; | |
add.s64 %rd224, %rd222, %rd223; | |
st.u64 [%rd220+8], %rd224; | |
$L__tmp475: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd225, [%SP+96]; | |
mov.b64 %rd226, %rd208; | |
$L__tmp476: | |
.loc 11 0 5765 | |
mov.b64 %rd227, %rd211; | |
$L__tmp477: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp478; | |
$L__tmp478: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd228, %rd226, 3; | |
add.s64 %rd229, %rd227, %rd228; | |
st.u64 [%rd225+16], %rd229; | |
$L__tmp479: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd230, [%SP+96]; | |
mov.b64 %rd231, %rd208; | |
$L__tmp480: | |
.loc 11 0 5778 | |
mov.b64 %rd232, %rd212; | |
$L__tmp481: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp482; | |
$L__tmp482: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd233, %rd231, 1; | |
add.s64 %rd234, %rd232, %rd233; | |
st.u64 [%rd230+24], %rd234; | |
$L__tmp483: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd235, [%SP+96]; | |
mov.b64 %rd236, %rd208; | |
$L__tmp484: | |
.loc 11 0 5801 | |
mov.b64 %rd237, %rd213; | |
$L__tmp485: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp486; | |
$L__tmp486: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd238, %rd236, 2; | |
add.s64 %rd239, %rd237, %rd238; | |
st.u64 [%rd235+32], %rd239; | |
$L__tmp487: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd240, [%SP+96]; | |
ld.u64 %rd241, [%SP+104]; | |
mov.b64 %rd242, %rd208; | |
$L__tmp488: | |
.loc 11 0 5822 | |
mov.b64 %rd243, %rd241; | |
st.u64 [%SP+88], %rd243; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp489; | |
$L__tmp489: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd244, [%SP+88]; | |
shl.b64 %rd245, %rd242, 3; | |
add.s64 %rd246, %rd244, %rd245; | |
st.u64 [%rd240+40], %rd246; | |
$L__tmp490: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd247, [%SP+120]; | |
setp.ne.s64 %p18, %rd247, 0; | |
not.pred %p19, %p18; | |
not.pred %p20, %p19; | |
@%p20 bra $L__BB5_14; | |
bra.uni $L__BB5_13; | |
$L__BB5_13: | |
mov.u32 %r16, 0; | |
mov.b32 %r17, %r16; | |
bra.uni $L__BB5_14; | |
$L__BB5_14: | |
ld.u64 %rd248, [%SP+128]; | |
ld.u64 %rd249, [%SP+136]; | |
ld.u64 %rd250, [%SP+144]; | |
ld.u64 %rd251, [%SP+152]; | |
ld.u64 %rd252, [%SP+160]; | |
ld.u64 %rd253, [%SP+168]; | |
$L__tmp491: | |
//test_v7_cuda.cu:63 crossProduct(ri, a[i], b[i]); | |
.loc 18 63 28 | |
st.u64 [%SP+648], %rd253; | |
st.u64 [%SP+640], %rd252; | |
st.u64 [%SP+632], %rd251; | |
st.u64 [%SP+624], %rd250; | |
st.u64 [%SP+616], %rd249; | |
st.u64 [%SP+608], %rd248; | |
add.u64 %rd254, %SP, 608; | |
mov.b64 %rd255, %rd254; | |
st.u64 [%SP+656], %rd255; | |
ld.u64 %rd256, [%SP+656]; | |
ld.u64 %rd257, [%SP+600]; | |
ld.u64 %rd258, [%SP+656]; | |
add.u64 %rd259, %SP, 664; | |
mov.b64 %rd260, %rd259; | |
st.u64 [%SP+176], %rd260; | |
mov.b64 %rd261, %rd257; | |
st.u64 [%SP+184], %rd261; | |
mov.b64 %rd262, %rd258; | |
st.u64 [%SP+192], %rd262; | |
//test_v7_cuda.cu:63 crossProduct(ri, a[i], b[i]); | |
.loc 18 63 5 | |
bra.uni $L__tmp492; | |
$L__tmp492: | |
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y; | |
.loc 18 51 5 | |
ld.u64 %rd263, [%SP+184]; | |
add.s64 %rd5, %rd263, 8; | |
$L__tmp493: | |
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y; | |
.loc 18 51 5 | |
bra.uni $L__tmp494; | |
$L__tmp494: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
setp.ne.s64 %p21, %rd5, 0; | |
not.pred %p22, %p21; | |
not.pred %p23, %p22; | |
@%p23 bra $L__BB5_16; | |
bra.uni $L__BB5_15; | |
$L__BB5_15: | |
mov.u32 %r18, 0; | |
mov.b32 %r19, %r18; | |
bra.uni $L__BB5_16; | |
$L__BB5_16: | |
ld.u64 %rd264, [%rd5]; | |
mov.b64 %rd265, %rd264; | |
$L__tmp495: | |
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y; | |
.loc 18 51 5 | |
ld.f64 %fd1, [%rd265]; | |
ld.u64 %rd266, [%SP+192]; | |
add.s64 %rd6, %rd266, 16; | |
$L__tmp496: | |
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y; | |
.loc 18 51 5 | |
bra.uni $L__tmp497; | |
$L__tmp497: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
setp.ne.s64 %p24, %rd6, 0; | |
not.pred %p25, %p24; | |
not.pred %p26, %p25; | |
@%p26 bra $L__BB5_18; | |
bra.uni $L__BB5_17; | |
$L__BB5_17: | |
mov.u32 %r20, 0; | |
mov.b32 %r21, %r20; | |
bra.uni $L__BB5_18; | |
$L__BB5_18: | |
ld.u64 %rd267, [%rd6]; | |
mov.b64 %rd268, %rd267; | |
$L__tmp498: | |
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y; | |
.loc 18 51 5 | |
ld.f64 %fd10, [%rd268]; | |
mul.f64 %fd2, %fd1, %fd10; | |
ld.u64 %rd269, [%SP+184]; | |
add.s64 %rd7, %rd269, 16; | |
$L__tmp499: | |
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y; | |
.loc 18 51 5 | |
bra.uni $L__tmp500; | |
$L__tmp500: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
setp.ne.s64 %p27, %rd7, 0; | |
not.pred %p28, %p27; | |
not.pred %p29, %p28; | |
@%p29 bra $L__BB5_20; | |
bra.uni $L__BB5_19; | |
$L__BB5_19: | |
mov.u32 %r22, 0; | |
mov.b32 %r23, %r22; | |
bra.uni $L__BB5_20; | |
$L__BB5_20: | |
ld.u64 %rd270, [%rd7]; | |
mov.b64 %rd271, %rd270; | |
$L__tmp501: | |
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y; | |
.loc 18 51 5 | |
ld.f64 %fd3, [%rd271]; | |
ld.u64 %rd272, [%SP+192]; | |
add.s64 %rd8, %rd272, 8; | |
$L__tmp502: | |
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y; | |
.loc 18 51 5 | |
bra.uni $L__tmp503; | |
$L__tmp503: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
setp.ne.s64 %p30, %rd8, 0; | |
not.pred %p31, %p30; | |
not.pred %p32, %p31; | |
@%p32 bra $L__BB5_22; | |
bra.uni $L__BB5_21; | |
$L__BB5_21: | |
mov.u32 %r24, 0; | |
mov.b32 %r25, %r24; | |
bra.uni $L__BB5_22; | |
$L__BB5_22: | |
ld.u64 %rd273, [%rd8]; | |
mov.b64 %rd274, %rd273; | |
$L__tmp504: | |
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y; | |
.loc 18 51 5 | |
ld.f64 %fd11, [%rd274]; | |
mul.f64 %fd12, %fd3, %fd11; | |
sub.f64 %fd13, %fd2, %fd12; | |
st.f64 [%SP+200], %fd13; | |
add.u64 %rd275, %SP, 200; | |
mov.b64 %rd276, %rd275; | |
ld.u64 %rd9, [%SP+176]; | |
mov.b64 %rd10, %rd276; | |
$L__tmp505: | |
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y; | |
.loc 18 51 5 | |
bra.uni $L__tmp506; | |
$L__tmp506: | |
//soa_v7.h:35 SOA_HOST_DEVICE_INLINE T& operator= (const T2& v) { return val_ = v; } | |
.loc 12 35 120 | |
setp.ne.s64 %p33, %rd9, 0; | |
not.pred %p34, %p33; | |
not.pred %p35, %p34; | |
@%p35 bra $L__BB5_24; | |
bra.uni $L__BB5_23; | |
$L__BB5_23: | |
mov.u32 %r26, 0; | |
mov.b32 %r27, %r26; | |
bra.uni $L__BB5_24; | |
$L__BB5_24: | |
ld.f64 %fd14, [%rd10]; | |
ld.u64 %rd277, [%rd9]; | |
mov.b64 %rd278, %rd277; | |
st.f64 [%rd278], %fd14; | |
mov.b64 %rd279, %rd278; | |
$L__tmp507: | |
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z; | |
.loc 18 52 5 | |
ld.u64 %rd280, [%SP+184]; | |
add.s64 %rd11, %rd280, 16; | |
$L__tmp508: | |
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z; | |
.loc 18 52 5 | |
bra.uni $L__tmp509; | |
$L__tmp509: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
setp.ne.s64 %p36, %rd11, 0; | |
not.pred %p37, %p36; | |
not.pred %p38, %p37; | |
@%p38 bra $L__BB5_26; | |
bra.uni $L__BB5_25; | |
$L__BB5_25: | |
mov.u32 %r28, 0; | |
mov.b32 %r29, %r28; | |
bra.uni $L__BB5_26; | |
$L__BB5_26: | |
ld.u64 %rd281, [%rd11]; | |
mov.b64 %rd282, %rd281; | |
$L__tmp510: | |
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z; | |
.loc 18 52 5 | |
ld.f64 %fd4, [%rd282]; | |
ld.u64 %rd12, [%SP+192]; | |
$L__tmp511: | |
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z; | |
.loc 18 52 5 | |
bra.uni $L__tmp512; | |
$L__tmp512: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
setp.ne.s64 %p39, %rd12, 0; | |
not.pred %p40, %p39; | |
not.pred %p41, %p40; | |
@%p41 bra $L__BB5_28; | |
bra.uni $L__BB5_27; | |
$L__BB5_27: | |
mov.u32 %r30, 0; | |
mov.b32 %r31, %r30; | |
bra.uni $L__BB5_28; | |
$L__BB5_28: | |
ld.u64 %rd283, [%rd12]; | |
mov.b64 %rd284, %rd283; | |
$L__tmp513: | |
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z; | |
.loc 18 52 5 | |
ld.f64 %fd15, [%rd284]; | |
mul.f64 %fd5, %fd4, %fd15; | |
ld.u64 %rd13, [%SP+184]; | |
$L__tmp514: | |
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z; | |
.loc 18 52 5 | |
bra.uni $L__tmp515; | |
$L__tmp515: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
setp.ne.s64 %p42, %rd13, 0; | |
not.pred %p43, %p42; | |
not.pred %p44, %p43; | |
@%p44 bra $L__BB5_30; | |
bra.uni $L__BB5_29; | |
$L__BB5_29: | |
mov.u32 %r32, 0; | |
mov.b32 %r33, %r32; | |
bra.uni $L__BB5_30; | |
$L__BB5_30: | |
ld.u64 %rd285, [%rd13]; | |
mov.b64 %rd286, %rd285; | |
$L__tmp516: | |
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z; | |
.loc 18 52 5 | |
ld.f64 %fd6, [%rd286]; | |
ld.u64 %rd287, [%SP+192]; | |
add.s64 %rd14, %rd287, 16; | |
$L__tmp517: | |
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z; | |
.loc 18 52 5 | |
bra.uni $L__tmp518; | |
$L__tmp518: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
setp.ne.s64 %p45, %rd14, 0; | |
not.pred %p46, %p45; | |
not.pred %p47, %p46; | |
@%p47 bra $L__BB5_32; | |
bra.uni $L__BB5_31; | |
$L__BB5_31: | |
mov.u32 %r34, 0; | |
mov.b32 %r35, %r34; | |
bra.uni $L__BB5_32; | |
$L__BB5_32: | |
ld.u64 %rd288, [%rd14]; | |
mov.b64 %rd289, %rd288; | |
$L__tmp519: | |
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z; | |
.loc 18 52 5 | |
ld.f64 %fd16, [%rd289]; | |
mul.f64 %fd17, %fd6, %fd16; | |
sub.f64 %fd18, %fd5, %fd17; | |
st.f64 [%SP+208], %fd18; | |
add.u64 %rd290, %SP, 208; | |
mov.b64 %rd291, %rd290; | |
ld.u64 %rd292, [%SP+176]; | |
add.s64 %rd15, %rd292, 8; | |
$L__tmp520: | |
.loc 18 0 5 | |
mov.b64 %rd16, %rd291; | |
$L__tmp521: | |
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z; | |
.loc 18 52 5 | |
bra.uni $L__tmp522; | |
$L__tmp522: | |
//soa_v7.h:35 SOA_HOST_DEVICE_INLINE T& operator= (const T2& v) { return val_ = v; } | |
.loc 12 35 120 | |
setp.ne.s64 %p48, %rd15, 0; | |
not.pred %p49, %p48; | |
not.pred %p50, %p49; | |
@%p50 bra $L__BB5_34; | |
bra.uni $L__BB5_33; | |
$L__BB5_33: | |
mov.u32 %r36, 0; | |
mov.b32 %r37, %r36; | |
bra.uni $L__BB5_34; | |
$L__BB5_34: | |
ld.f64 %fd19, [%rd16]; | |
ld.u64 %rd293, [%rd15]; | |
mov.b64 %rd294, %rd293; | |
st.f64 [%rd294], %fd19; | |
mov.b64 %rd295, %rd294; | |
$L__tmp523: | |
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x; | |
.loc 18 53 5 | |
ld.u64 %rd17, [%SP+184]; | |
$L__tmp524: | |
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x; | |
.loc 18 53 5 | |
bra.uni $L__tmp525; | |
$L__tmp525: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
setp.ne.s64 %p51, %rd17, 0; | |
not.pred %p52, %p51; | |
not.pred %p53, %p52; | |
@%p53 bra $L__BB5_36; | |
bra.uni $L__BB5_35; | |
$L__BB5_35: | |
mov.u32 %r38, 0; | |
mov.b32 %r39, %r38; | |
bra.uni $L__BB5_36; | |
$L__BB5_36: | |
ld.u64 %rd296, [%rd17]; | |
mov.b64 %rd297, %rd296; | |
$L__tmp526: | |
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x; | |
.loc 18 53 5 | |
ld.f64 %fd7, [%rd297]; | |
ld.u64 %rd298, [%SP+192]; | |
add.s64 %rd18, %rd298, 8; | |
$L__tmp527: | |
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x; | |
.loc 18 53 5 | |
bra.uni $L__tmp528; | |
$L__tmp528: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
setp.ne.s64 %p54, %rd18, 0; | |
not.pred %p55, %p54; | |
not.pred %p56, %p55; | |
@%p56 bra $L__BB5_38; | |
bra.uni $L__BB5_37; | |
$L__BB5_37: | |
mov.u32 %r40, 0; | |
mov.b32 %r41, %r40; | |
bra.uni $L__BB5_38; | |
$L__BB5_38: | |
ld.u64 %rd299, [%rd18]; | |
mov.b64 %rd300, %rd299; | |
$L__tmp529: | |
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x; | |
.loc 18 53 5 | |
ld.f64 %fd20, [%rd300]; | |
mul.f64 %fd8, %fd7, %fd20; | |
ld.u64 %rd301, [%SP+184]; | |
add.s64 %rd19, %rd301, 8; | |
$L__tmp530: | |
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x; | |
.loc 18 53 5 | |
bra.uni $L__tmp531; | |
$L__tmp531: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
setp.ne.s64 %p57, %rd19, 0; | |
not.pred %p58, %p57; | |
not.pred %p59, %p58; | |
@%p59 bra $L__BB5_40; | |
bra.uni $L__BB5_39; | |
$L__BB5_39: | |
mov.u32 %r42, 0; | |
mov.b32 %r43, %r42; | |
bra.uni $L__BB5_40; | |
$L__BB5_40: | |
ld.u64 %rd302, [%rd19]; | |
mov.b64 %rd303, %rd302; | |
$L__tmp532: | |
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x; | |
.loc 18 53 5 | |
ld.f64 %fd9, [%rd303]; | |
ld.u64 %rd20, [%SP+192]; | |
$L__tmp533: | |
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x; | |
.loc 18 53 5 | |
bra.uni $L__tmp534; | |
$L__tmp534: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
setp.ne.s64 %p60, %rd20, 0; | |
not.pred %p61, %p60; | |
not.pred %p62, %p61; | |
@%p62 bra $L__BB5_42; | |
bra.uni $L__BB5_41; | |
$L__BB5_41: | |
mov.u32 %r44, 0; | |
mov.b32 %r45, %r44; | |
bra.uni $L__BB5_42; | |
$L__BB5_42: | |
ld.u64 %rd304, [%rd20]; | |
mov.b64 %rd305, %rd304; | |
$L__tmp535: | |
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x; | |
.loc 18 53 5 | |
ld.f64 %fd21, [%rd305]; | |
mul.f64 %fd22, %fd9, %fd21; | |
sub.f64 %fd23, %fd8, %fd22; | |
st.f64 [%SP+216], %fd23; | |
add.u64 %rd306, %SP, 216; | |
mov.b64 %rd307, %rd306; | |
ld.u64 %rd308, [%SP+176]; | |
add.s64 %rd21, %rd308, 16; | |
$L__tmp536: | |
.loc 18 0 5 | |
mov.b64 %rd22, %rd307; | |
$L__tmp537: | |
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x; | |
.loc 18 53 5 | |
bra.uni $L__tmp538; | |
$L__tmp538: | |
//soa_v7.h:35 SOA_HOST_DEVICE_INLINE T& operator= (const T2& v) { return val_ = v; } | |
.loc 12 35 120 | |
setp.ne.s64 %p63, %rd21, 0; | |
not.pred %p64, %p63; | |
not.pred %p65, %p64; | |
@%p65 bra $L__BB5_44; | |
bra.uni $L__BB5_43; | |
$L__BB5_43: | |
mov.u32 %r46, 0; | |
mov.b32 %r47, %r46; | |
bra.uni $L__BB5_44; | |
$L__BB5_44: | |
ld.f64 %fd24, [%rd22]; | |
ld.u64 %rd309, [%rd21]; | |
mov.b64 %rd310, %rd309; | |
st.f64 [%rd310], %fd24; | |
mov.b64 %rd311, %rd310; | |
$L__tmp539: | |
//test_v7_cuda.cu:64 } | |
.loc 18 64 3 | |
bra.uni $L__BB5_45; | |
$L__BB5_45: | |
ret; | |
$L__tmp540: | |
$L__func_end5: | |
} | |
.entry _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m( | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0[80], | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1[80], | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2[80], | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3 | |
) | |
{ | |
.local .align 8 .b8 __local_depot6[2544]; | |
.reg .b64 %SP; | |
.reg .b64 %SPL; | |
.reg .pred %p<138>; | |
.reg .b32 %r<96>; | |
.reg .f64 %fd<25>; | |
.reg .b64 %rd<1155>; | |
//test_v7_cuda.cu:67 __global__ void directCrossProductSoA(testSoA::SoA r, const testSoA::SoA a, const testSoA::SoA b, size_t nElements) { | |
.loc 18 67 0 | |
$L__func_begin6: | |
//test_v7_cuda.cu:67 __global__ void directCrossProductSoA(testSoA::SoA r, const testSoA::SoA a, const testSoA::SoA b, size_t nElements) { | |
.loc 18 67 0 | |
mov.u64 %SPL, __local_depot6; | |
cvta.local.u64 %SP, %SPL; | |
ld.param.u64 %rd44, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+8]; | |
ld.param.u64 %rd45, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+16]; | |
ld.param.u64 %rd46, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+24]; | |
ld.param.u64 %rd47, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+32]; | |
ld.param.u64 %rd48, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+40]; | |
ld.param.u64 %rd49, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+48]; | |
ld.param.u64 %rd50, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+56]; | |
ld.param.u64 %rd51, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+64]; | |
ld.param.u64 %rd52, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+72]; | |
ld.param.u64 %rd34, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+8]; | |
ld.param.u64 %rd35, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+16]; | |
ld.param.u64 %rd36, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+24]; | |
ld.param.u64 %rd37, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+32]; | |
ld.param.u64 %rd38, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+40]; | |
ld.param.u64 %rd39, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+48]; | |
ld.param.u64 %rd40, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+56]; | |
ld.param.u64 %rd41, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+64]; | |
ld.param.u64 %rd42, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+72]; | |
ld.param.u64 %rd24, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+8]; | |
ld.param.u64 %rd25, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+16]; | |
ld.param.u64 %rd26, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+24]; | |
ld.param.u64 %rd27, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+32]; | |
ld.param.u64 %rd28, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+40]; | |
ld.param.u64 %rd29, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+48]; | |
ld.param.u64 %rd30, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+56]; | |
ld.param.u64 %rd31, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+64]; | |
ld.param.u64 %rd32, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+72]; | |
ld.param.u64 %rd53, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3]; | |
ld.param.u64 %rd43, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2]; | |
ld.param.u64 %rd33, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1]; | |
ld.param.u64 %rd23, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0]; | |
st.u64 [%SP+1512], %rd32; | |
st.u64 [%SP+1504], %rd31; | |
st.u64 [%SP+1496], %rd30; | |
st.u64 [%SP+1488], %rd29; | |
st.u64 [%SP+1480], %rd28; | |
st.u64 [%SP+1472], %rd27; | |
st.u64 [%SP+1464], %rd26; | |
st.u64 [%SP+1456], %rd25; | |
st.u64 [%SP+1448], %rd24; | |
st.u64 [%SP+1440], %rd23; | |
st.u64 [%SP+1592], %rd42; | |
st.u64 [%SP+1584], %rd41; | |
st.u64 [%SP+1576], %rd40; | |
st.u64 [%SP+1568], %rd39; | |
st.u64 [%SP+1560], %rd38; | |
st.u64 [%SP+1552], %rd37; | |
st.u64 [%SP+1544], %rd36; | |
st.u64 [%SP+1536], %rd35; | |
st.u64 [%SP+1528], %rd34; | |
st.u64 [%SP+1520], %rd33; | |
st.u64 [%SP+1672], %rd52; | |
st.u64 [%SP+1664], %rd51; | |
st.u64 [%SP+1656], %rd50; | |
st.u64 [%SP+1648], %rd49; | |
st.u64 [%SP+1640], %rd48; | |
st.u64 [%SP+1632], %rd47; | |
st.u64 [%SP+1624], %rd46; | |
st.u64 [%SP+1616], %rd45; | |
st.u64 [%SP+1608], %rd44; | |
st.u64 [%SP+1600], %rd43; | |
$L__tmp541: | |
//test_v7_cuda.cu:68 size_t i = blockIdx.x * blockDim.x + threadIdx.x; | |
.loc 18 68 14 | |
mov.u32 %r1, %ctaid.x; | |
mov.u32 %r2, %ntid.x; | |
mul.lo.s32 %r3, %r1, %r2; | |
mov.u32 %r4, %tid.x; | |
add.s32 %r5, %r3, %r4; | |
cvt.u64.u32 %rd1, %r5; | |
$L__tmp542: | |
//test_v7_cuda.cu:69 if (i >= nElements) return; | |
.loc 18 69 5 | |
setp.ge.u64 %p1, %rd1, %rd53; | |
not.pred %p2, %p1; | |
@%p2 bra $L__BB6_2; | |
bra.uni $L__BB6_1; | |
$L__BB6_1: | |
$L__tmp543: | |
//test_v7_cuda.cu:69 if (i >= nElements) return; | |
.loc 18 69 25 | |
bra.uni $L__BB6_93; | |
$L__tmp544: | |
$L__BB6_2: | |
.loc 18 0 25 | |
add.u64 %rd54, %SP, 1520; | |
mov.b64 %rd55, %rd54; | |
st.u64 [%SP+1384], %rd55; | |
mov.b64 %rd2, %rd1; | |
$L__tmp545: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 14 | |
bra.uni $L__tmp546; | |
$L__tmp546: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
ld.u64 %rd56, [%SP+1384]; | |
mov.b64 %rd57, %rd56; | |
st.u64 [%SP+1376], %rd57; | |
mov.b64 %rd58, %rd2; | |
$L__tmp547: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
bra.uni $L__tmp548; | |
$L__tmp548: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd59, [%SP+1376]; | |
setp.ne.s64 %p3, %rd59, 0; | |
not.pred %p4, %p3; | |
not.pred %p5, %p4; | |
@%p5 bra $L__BB6_4; | |
bra.uni $L__BB6_3; | |
$L__BB6_3: | |
mov.u32 %r6, 0; | |
mov.b32 %r7, %r6; | |
bra.uni $L__BB6_4; | |
$L__tmp549: | |
$L__BB6_4: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd60, [%SP+1384]; | |
ld.u64 %rd61, [%rd60+24]; | |
ld.u64 %rd62, [%SP+1384]; | |
ld.u64 %rd63, [%rd62+32]; | |
ld.u64 %rd64, [%SP+1384]; | |
ld.u64 %rd65, [%rd64+40]; | |
ld.u64 %rd66, [%SP+1384]; | |
ld.u64 %rd67, [%rd66+48]; | |
ld.u64 %rd68, [%SP+1384]; | |
ld.u64 %rd69, [%rd68+56]; | |
ld.u64 %rd70, [%SP+1384]; | |
ld.u64 %rd71, [%rd70+64]; | |
add.u64 %rd72, %SP, 1392; | |
mov.b64 %rd73, %rd72; | |
st.u64 [%SP+1360], %rd73; | |
mov.b64 %rd74, %rd2; | |
$L__tmp550: | |
.loc 11 0 6996 | |
mov.b64 %rd75, %rd61; | |
$L__tmp551: | |
mov.b64 %rd76, %rd63; | |
$L__tmp552: | |
mov.b64 %rd77, %rd65; | |
$L__tmp553: | |
mov.b64 %rd78, %rd67; | |
$L__tmp554: | |
mov.b64 %rd79, %rd69; | |
$L__tmp555: | |
mov.b64 %rd80, %rd71; | |
st.u64 [%SP+1368], %rd80; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
bra.uni $L__tmp556; | |
$L__tmp556: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd81, [%SP+1360]; | |
$L__tmp557: | |
.loc 11 0 5739 | |
mov.b64 %rd82, %rd74; | |
$L__tmp558: | |
mov.b64 %rd83, %rd75; | |
$L__tmp559: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp560; | |
$L__tmp560: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd84, %rd82, 3; | |
add.s64 %rd85, %rd83, %rd84; | |
st.u64 [%rd81], %rd85; | |
$L__tmp561: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd86, [%SP+1360]; | |
mov.b64 %rd87, %rd74; | |
$L__tmp562: | |
.loc 11 0 5752 | |
mov.b64 %rd88, %rd76; | |
$L__tmp563: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp564; | |
$L__tmp564: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd89, %rd87, 3; | |
add.s64 %rd90, %rd88, %rd89; | |
st.u64 [%rd86+8], %rd90; | |
$L__tmp565: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd91, [%SP+1360]; | |
mov.b64 %rd92, %rd74; | |
$L__tmp566: | |
.loc 11 0 5765 | |
mov.b64 %rd93, %rd77; | |
$L__tmp567: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp568; | |
$L__tmp568: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd94, %rd92, 3; | |
add.s64 %rd95, %rd93, %rd94; | |
st.u64 [%rd91+16], %rd95; | |
$L__tmp569: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd96, [%SP+1360]; | |
mov.b64 %rd97, %rd74; | |
$L__tmp570: | |
.loc 11 0 5778 | |
mov.b64 %rd98, %rd78; | |
$L__tmp571: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp572; | |
$L__tmp572: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd99, %rd97, 1; | |
add.s64 %rd100, %rd98, %rd99; | |
st.u64 [%rd96+24], %rd100; | |
$L__tmp573: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd101, [%SP+1360]; | |
mov.b64 %rd102, %rd74; | |
$L__tmp574: | |
.loc 11 0 5801 | |
mov.b64 %rd103, %rd79; | |
$L__tmp575: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp576; | |
$L__tmp576: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd104, %rd102, 2; | |
add.s64 %rd105, %rd103, %rd104; | |
st.u64 [%rd101+32], %rd105; | |
$L__tmp577: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd106, [%SP+1360]; | |
ld.u64 %rd107, [%SP+1368]; | |
mov.b64 %rd108, %rd74; | |
$L__tmp578: | |
.loc 11 0 5822 | |
mov.b64 %rd109, %rd107; | |
st.u64 [%SP+1352], %rd109; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp579; | |
$L__tmp579: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd110, [%SP+1352]; | |
shl.b64 %rd111, %rd108, 3; | |
add.s64 %rd112, %rd110, %rd111; | |
st.u64 [%rd106+40], %rd112; | |
$L__tmp580: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd113, [%SP+1384]; | |
setp.ne.s64 %p6, %rd113, 0; | |
not.pred %p7, %p6; | |
not.pred %p8, %p7; | |
@%p8 bra $L__BB6_6; | |
bra.uni $L__BB6_5; | |
$L__BB6_5: | |
mov.u32 %r8, 0; | |
mov.b32 %r9, %r8; | |
bra.uni $L__BB6_6; | |
$L__BB6_6: | |
ld.u64 %rd114, [%SP+1392]; | |
ld.u64 %rd115, [%SP+1400]; | |
ld.u64 %rd116, [%SP+1408]; | |
ld.u64 %rd117, [%SP+1416]; | |
ld.u64 %rd118, [%SP+1424]; | |
ld.u64 %rd119, [%SP+1432]; | |
$L__tmp581: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 14 | |
st.u64 [%SP+1864], %rd119; | |
st.u64 [%SP+1856], %rd118; | |
st.u64 [%SP+1848], %rd117; | |
st.u64 [%SP+1840], %rd116; | |
st.u64 [%SP+1832], %rd115; | |
st.u64 [%SP+1824], %rd114; | |
ld.u64 %rd120, [%SP+1832]; | |
st.u64 [%SP+1696], %rd120; | |
add.u64 %rd121, %SP, 1696; | |
mov.b64 %rd122, %rd121; | |
st.u64 [%SP+0], %rd122; | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 14 | |
bra.uni $L__tmp582; | |
$L__tmp582: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
ld.u64 %rd123, [%SP+0]; | |
setp.ne.s64 %p9, %rd123, 0; | |
not.pred %p10, %p9; | |
not.pred %p11, %p10; | |
@%p11 bra $L__BB6_8; | |
bra.uni $L__BB6_7; | |
$L__BB6_7: | |
mov.u32 %r10, 0; | |
mov.b32 %r11, %r10; | |
bra.uni $L__BB6_8; | |
$L__BB6_8: | |
ld.u64 %rd124, [%SP+0]; | |
ld.u64 %rd125, [%rd124]; | |
mov.b64 %rd126, %rd125; | |
$L__tmp583: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 14 | |
ld.f64 %fd1, [%rd126]; | |
add.u64 %rd127, %SP, 1600; | |
mov.b64 %rd128, %rd127; | |
st.u64 [%SP+40], %rd128; | |
mov.b64 %rd3, %rd1; | |
$L__tmp584: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 23 | |
bra.uni $L__tmp585; | |
$L__tmp585: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
ld.u64 %rd129, [%SP+40]; | |
mov.b64 %rd130, %rd129; | |
st.u64 [%SP+32], %rd130; | |
mov.b64 %rd131, %rd3; | |
$L__tmp586: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
bra.uni $L__tmp587; | |
$L__tmp587: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd132, [%SP+32]; | |
setp.ne.s64 %p12, %rd132, 0; | |
not.pred %p13, %p12; | |
not.pred %p14, %p13; | |
@%p14 bra $L__BB6_10; | |
bra.uni $L__BB6_9; | |
$L__BB6_9: | |
mov.u32 %r12, 0; | |
mov.b32 %r13, %r12; | |
bra.uni $L__BB6_10; | |
$L__tmp588: | |
$L__BB6_10: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd133, [%SP+40]; | |
ld.u64 %rd134, [%rd133+24]; | |
ld.u64 %rd135, [%SP+40]; | |
ld.u64 %rd136, [%rd135+32]; | |
ld.u64 %rd137, [%SP+40]; | |
ld.u64 %rd138, [%rd137+40]; | |
ld.u64 %rd139, [%SP+40]; | |
ld.u64 %rd140, [%rd139+48]; | |
ld.u64 %rd141, [%SP+40]; | |
ld.u64 %rd142, [%rd141+56]; | |
ld.u64 %rd143, [%SP+40]; | |
ld.u64 %rd144, [%rd143+64]; | |
add.u64 %rd145, %SP, 48; | |
mov.b64 %rd146, %rd145; | |
st.u64 [%SP+16], %rd146; | |
mov.b64 %rd147, %rd3; | |
$L__tmp589: | |
.loc 11 0 6996 | |
mov.b64 %rd148, %rd134; | |
$L__tmp590: | |
mov.b64 %rd149, %rd136; | |
$L__tmp591: | |
mov.b64 %rd150, %rd138; | |
$L__tmp592: | |
mov.b64 %rd151, %rd140; | |
$L__tmp593: | |
mov.b64 %rd152, %rd142; | |
$L__tmp594: | |
mov.b64 %rd153, %rd144; | |
st.u64 [%SP+24], %rd153; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
bra.uni $L__tmp595; | |
$L__tmp595: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd154, [%SP+16]; | |
$L__tmp596: | |
.loc 11 0 5739 | |
mov.b64 %rd155, %rd147; | |
$L__tmp597: | |
mov.b64 %rd156, %rd148; | |
$L__tmp598: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp599; | |
$L__tmp599: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd157, %rd155, 3; | |
add.s64 %rd158, %rd156, %rd157; | |
st.u64 [%rd154], %rd158; | |
$L__tmp600: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd159, [%SP+16]; | |
mov.b64 %rd160, %rd147; | |
$L__tmp601: | |
.loc 11 0 5752 | |
mov.b64 %rd161, %rd149; | |
$L__tmp602: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp603; | |
$L__tmp603: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd162, %rd160, 3; | |
add.s64 %rd163, %rd161, %rd162; | |
st.u64 [%rd159+8], %rd163; | |
$L__tmp604: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd164, [%SP+16]; | |
mov.b64 %rd165, %rd147; | |
$L__tmp605: | |
.loc 11 0 5765 | |
mov.b64 %rd166, %rd150; | |
$L__tmp606: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp607; | |
$L__tmp607: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd167, %rd165, 3; | |
add.s64 %rd168, %rd166, %rd167; | |
st.u64 [%rd164+16], %rd168; | |
$L__tmp608: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd169, [%SP+16]; | |
mov.b64 %rd170, %rd147; | |
$L__tmp609: | |
.loc 11 0 5778 | |
mov.b64 %rd171, %rd151; | |
$L__tmp610: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp611; | |
$L__tmp611: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd172, %rd170, 1; | |
add.s64 %rd173, %rd171, %rd172; | |
st.u64 [%rd169+24], %rd173; | |
$L__tmp612: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd174, [%SP+16]; | |
mov.b64 %rd175, %rd147; | |
$L__tmp613: | |
.loc 11 0 5801 | |
mov.b64 %rd176, %rd152; | |
$L__tmp614: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp615; | |
$L__tmp615: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd177, %rd175, 2; | |
add.s64 %rd178, %rd176, %rd177; | |
st.u64 [%rd174+32], %rd178; | |
$L__tmp616: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd179, [%SP+16]; | |
ld.u64 %rd180, [%SP+24]; | |
mov.b64 %rd181, %rd147; | |
$L__tmp617: | |
.loc 11 0 5822 | |
mov.b64 %rd182, %rd180; | |
st.u64 [%SP+8], %rd182; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp618; | |
$L__tmp618: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd183, [%SP+8]; | |
shl.b64 %rd184, %rd181, 3; | |
add.s64 %rd185, %rd183, %rd184; | |
st.u64 [%rd179+40], %rd185; | |
$L__tmp619: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd186, [%SP+40]; | |
setp.ne.s64 %p15, %rd186, 0; | |
not.pred %p16, %p15; | |
not.pred %p17, %p16; | |
@%p17 bra $L__BB6_12; | |
bra.uni $L__BB6_11; | |
$L__BB6_11: | |
mov.u32 %r14, 0; | |
mov.b32 %r15, %r14; | |
bra.uni $L__BB6_12; | |
$L__BB6_12: | |
ld.u64 %rd187, [%SP+48]; | |
ld.u64 %rd188, [%SP+56]; | |
ld.u64 %rd189, [%SP+64]; | |
ld.u64 %rd190, [%SP+72]; | |
ld.u64 %rd191, [%SP+80]; | |
ld.u64 %rd192, [%SP+88]; | |
$L__tmp620: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 23 | |
st.u64 [%SP+1912], %rd192; | |
st.u64 [%SP+1904], %rd191; | |
st.u64 [%SP+1896], %rd190; | |
st.u64 [%SP+1888], %rd189; | |
st.u64 [%SP+1880], %rd188; | |
st.u64 [%SP+1872], %rd187; | |
ld.u64 %rd193, [%SP+1888]; | |
st.u64 [%SP+1704], %rd193; | |
add.u64 %rd194, %SP, 1704; | |
mov.b64 %rd195, %rd194; | |
st.u64 [%SP+96], %rd195; | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 23 | |
bra.uni $L__tmp621; | |
$L__tmp621: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
ld.u64 %rd196, [%SP+96]; | |
setp.ne.s64 %p18, %rd196, 0; | |
not.pred %p19, %p18; | |
not.pred %p20, %p19; | |
@%p20 bra $L__BB6_14; | |
bra.uni $L__BB6_13; | |
$L__BB6_13: | |
mov.u32 %r16, 0; | |
mov.b32 %r17, %r16; | |
bra.uni $L__BB6_14; | |
$L__BB6_14: | |
ld.u64 %rd197, [%SP+96]; | |
ld.u64 %rd198, [%rd197]; | |
mov.b64 %rd199, %rd198; | |
$L__tmp622: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 23 | |
ld.f64 %fd10, [%rd199]; | |
mul.f64 %fd2, %fd1, %fd10; | |
add.u64 %rd200, %SP, 1520; | |
mov.b64 %rd201, %rd200; | |
st.u64 [%SP+136], %rd201; | |
mov.b64 %rd4, %rd1; | |
$L__tmp623: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 32 | |
bra.uni $L__tmp624; | |
$L__tmp624: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
ld.u64 %rd202, [%SP+136]; | |
mov.b64 %rd203, %rd202; | |
st.u64 [%SP+128], %rd203; | |
mov.b64 %rd204, %rd4; | |
$L__tmp625: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
bra.uni $L__tmp626; | |
$L__tmp626: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd205, [%SP+128]; | |
setp.ne.s64 %p21, %rd205, 0; | |
not.pred %p22, %p21; | |
not.pred %p23, %p22; | |
@%p23 bra $L__BB6_16; | |
bra.uni $L__BB6_15; | |
$L__BB6_15: | |
mov.u32 %r18, 0; | |
mov.b32 %r19, %r18; | |
bra.uni $L__BB6_16; | |
$L__tmp627: | |
$L__BB6_16: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd206, [%SP+136]; | |
ld.u64 %rd207, [%rd206+24]; | |
ld.u64 %rd208, [%SP+136]; | |
ld.u64 %rd209, [%rd208+32]; | |
ld.u64 %rd210, [%SP+136]; | |
ld.u64 %rd211, [%rd210+40]; | |
ld.u64 %rd212, [%SP+136]; | |
ld.u64 %rd213, [%rd212+48]; | |
ld.u64 %rd214, [%SP+136]; | |
ld.u64 %rd215, [%rd214+56]; | |
ld.u64 %rd216, [%SP+136]; | |
ld.u64 %rd217, [%rd216+64]; | |
add.u64 %rd218, %SP, 144; | |
mov.b64 %rd219, %rd218; | |
st.u64 [%SP+112], %rd219; | |
mov.b64 %rd220, %rd4; | |
$L__tmp628: | |
.loc 11 0 6996 | |
mov.b64 %rd221, %rd207; | |
$L__tmp629: | |
mov.b64 %rd222, %rd209; | |
$L__tmp630: | |
mov.b64 %rd223, %rd211; | |
$L__tmp631: | |
mov.b64 %rd224, %rd213; | |
$L__tmp632: | |
mov.b64 %rd225, %rd215; | |
$L__tmp633: | |
mov.b64 %rd226, %rd217; | |
st.u64 [%SP+120], %rd226; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
bra.uni $L__tmp634; | |
$L__tmp634: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd227, [%SP+112]; | |
$L__tmp635: | |
.loc 11 0 5739 | |
mov.b64 %rd228, %rd220; | |
$L__tmp636: | |
mov.b64 %rd229, %rd221; | |
$L__tmp637: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp638; | |
$L__tmp638: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd230, %rd228, 3; | |
add.s64 %rd231, %rd229, %rd230; | |
st.u64 [%rd227], %rd231; | |
$L__tmp639: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd232, [%SP+112]; | |
mov.b64 %rd233, %rd220; | |
$L__tmp640: | |
.loc 11 0 5752 | |
mov.b64 %rd234, %rd222; | |
$L__tmp641: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp642; | |
$L__tmp642: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd235, %rd233, 3; | |
add.s64 %rd236, %rd234, %rd235; | |
st.u64 [%rd232+8], %rd236; | |
$L__tmp643: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd237, [%SP+112]; | |
mov.b64 %rd238, %rd220; | |
$L__tmp644: | |
.loc 11 0 5765 | |
mov.b64 %rd239, %rd223; | |
$L__tmp645: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp646; | |
$L__tmp646: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd240, %rd238, 3; | |
add.s64 %rd241, %rd239, %rd240; | |
st.u64 [%rd237+16], %rd241; | |
$L__tmp647: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd242, [%SP+112]; | |
mov.b64 %rd243, %rd220; | |
$L__tmp648: | |
.loc 11 0 5778 | |
mov.b64 %rd244, %rd224; | |
$L__tmp649: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp650; | |
$L__tmp650: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd245, %rd243, 1; | |
add.s64 %rd246, %rd244, %rd245; | |
st.u64 [%rd242+24], %rd246; | |
$L__tmp651: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd247, [%SP+112]; | |
mov.b64 %rd248, %rd220; | |
$L__tmp652: | |
.loc 11 0 5801 | |
mov.b64 %rd249, %rd225; | |
$L__tmp653: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp654; | |
$L__tmp654: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd250, %rd248, 2; | |
add.s64 %rd251, %rd249, %rd250; | |
st.u64 [%rd247+32], %rd251; | |
$L__tmp655: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd252, [%SP+112]; | |
ld.u64 %rd253, [%SP+120]; | |
mov.b64 %rd254, %rd220; | |
$L__tmp656: | |
.loc 11 0 5822 | |
mov.b64 %rd255, %rd253; | |
st.u64 [%SP+104], %rd255; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp657; | |
$L__tmp657: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd256, [%SP+104]; | |
shl.b64 %rd257, %rd254, 3; | |
add.s64 %rd258, %rd256, %rd257; | |
st.u64 [%rd252+40], %rd258; | |
$L__tmp658: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd259, [%SP+136]; | |
setp.ne.s64 %p24, %rd259, 0; | |
not.pred %p25, %p24; | |
not.pred %p26, %p25; | |
@%p26 bra $L__BB6_18; | |
bra.uni $L__BB6_17; | |
$L__BB6_17: | |
mov.u32 %r20, 0; | |
mov.b32 %r21, %r20; | |
bra.uni $L__BB6_18; | |
$L__BB6_18: | |
ld.u64 %rd260, [%SP+144]; | |
ld.u64 %rd261, [%SP+152]; | |
ld.u64 %rd262, [%SP+160]; | |
ld.u64 %rd263, [%SP+168]; | |
ld.u64 %rd264, [%SP+176]; | |
ld.u64 %rd265, [%SP+184]; | |
$L__tmp659: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 32 | |
st.u64 [%SP+1960], %rd265; | |
st.u64 [%SP+1952], %rd264; | |
st.u64 [%SP+1944], %rd263; | |
st.u64 [%SP+1936], %rd262; | |
st.u64 [%SP+1928], %rd261; | |
st.u64 [%SP+1920], %rd260; | |
ld.u64 %rd266, [%SP+1936]; | |
st.u64 [%SP+1712], %rd266; | |
add.u64 %rd267, %SP, 1712; | |
mov.b64 %rd268, %rd267; | |
st.u64 [%SP+192], %rd268; | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 32 | |
bra.uni $L__tmp660; | |
$L__tmp660: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
ld.u64 %rd269, [%SP+192]; | |
setp.ne.s64 %p27, %rd269, 0; | |
not.pred %p28, %p27; | |
not.pred %p29, %p28; | |
@%p29 bra $L__BB6_20; | |
bra.uni $L__BB6_19; | |
$L__BB6_19: | |
mov.u32 %r22, 0; | |
mov.b32 %r23, %r22; | |
bra.uni $L__BB6_20; | |
$L__BB6_20: | |
ld.u64 %rd270, [%SP+192]; | |
ld.u64 %rd271, [%rd270]; | |
mov.b64 %rd272, %rd271; | |
$L__tmp661: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 32 | |
ld.f64 %fd3, [%rd272]; | |
add.u64 %rd273, %SP, 1600; | |
mov.b64 %rd274, %rd273; | |
st.u64 [%SP+232], %rd274; | |
mov.b64 %rd5, %rd1; | |
$L__tmp662: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 41 | |
bra.uni $L__tmp663; | |
$L__tmp663: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
ld.u64 %rd275, [%SP+232]; | |
mov.b64 %rd276, %rd275; | |
st.u64 [%SP+224], %rd276; | |
mov.b64 %rd277, %rd5; | |
$L__tmp664: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
bra.uni $L__tmp665; | |
$L__tmp665: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd278, [%SP+224]; | |
setp.ne.s64 %p30, %rd278, 0; | |
not.pred %p31, %p30; | |
not.pred %p32, %p31; | |
@%p32 bra $L__BB6_22; | |
bra.uni $L__BB6_21; | |
$L__BB6_21: | |
mov.u32 %r24, 0; | |
mov.b32 %r25, %r24; | |
bra.uni $L__BB6_22; | |
$L__tmp666: | |
$L__BB6_22: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd279, [%SP+232]; | |
ld.u64 %rd280, [%rd279+24]; | |
ld.u64 %rd281, [%SP+232]; | |
ld.u64 %rd282, [%rd281+32]; | |
ld.u64 %rd283, [%SP+232]; | |
ld.u64 %rd284, [%rd283+40]; | |
ld.u64 %rd285, [%SP+232]; | |
ld.u64 %rd286, [%rd285+48]; | |
ld.u64 %rd287, [%SP+232]; | |
ld.u64 %rd288, [%rd287+56]; | |
ld.u64 %rd289, [%SP+232]; | |
ld.u64 %rd290, [%rd289+64]; | |
add.u64 %rd291, %SP, 240; | |
mov.b64 %rd292, %rd291; | |
st.u64 [%SP+208], %rd292; | |
mov.b64 %rd293, %rd5; | |
$L__tmp667: | |
.loc 11 0 6996 | |
mov.b64 %rd294, %rd280; | |
$L__tmp668: | |
mov.b64 %rd295, %rd282; | |
$L__tmp669: | |
mov.b64 %rd296, %rd284; | |
$L__tmp670: | |
mov.b64 %rd297, %rd286; | |
$L__tmp671: | |
mov.b64 %rd298, %rd288; | |
$L__tmp672: | |
mov.b64 %rd299, %rd290; | |
st.u64 [%SP+216], %rd299; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
bra.uni $L__tmp673; | |
$L__tmp673: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd300, [%SP+208]; | |
$L__tmp674: | |
.loc 11 0 5739 | |
mov.b64 %rd301, %rd293; | |
$L__tmp675: | |
mov.b64 %rd302, %rd294; | |
$L__tmp676: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp677; | |
$L__tmp677: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd303, %rd301, 3; | |
add.s64 %rd304, %rd302, %rd303; | |
st.u64 [%rd300], %rd304; | |
$L__tmp678: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd305, [%SP+208]; | |
mov.b64 %rd306, %rd293; | |
$L__tmp679: | |
.loc 11 0 5752 | |
mov.b64 %rd307, %rd295; | |
$L__tmp680: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp681; | |
$L__tmp681: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd308, %rd306, 3; | |
add.s64 %rd309, %rd307, %rd308; | |
st.u64 [%rd305+8], %rd309; | |
$L__tmp682: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd310, [%SP+208]; | |
mov.b64 %rd311, %rd293; | |
$L__tmp683: | |
.loc 11 0 5765 | |
mov.b64 %rd312, %rd296; | |
$L__tmp684: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp685; | |
$L__tmp685: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd313, %rd311, 3; | |
add.s64 %rd314, %rd312, %rd313; | |
st.u64 [%rd310+16], %rd314; | |
$L__tmp686: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd315, [%SP+208]; | |
mov.b64 %rd316, %rd293; | |
$L__tmp687: | |
.loc 11 0 5778 | |
mov.b64 %rd317, %rd297; | |
$L__tmp688: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp689; | |
$L__tmp689: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd318, %rd316, 1; | |
add.s64 %rd319, %rd317, %rd318; | |
st.u64 [%rd315+24], %rd319; | |
$L__tmp690: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd320, [%SP+208]; | |
mov.b64 %rd321, %rd293; | |
$L__tmp691: | |
.loc 11 0 5801 | |
mov.b64 %rd322, %rd298; | |
$L__tmp692: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp693; | |
$L__tmp693: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd323, %rd321, 2; | |
add.s64 %rd324, %rd322, %rd323; | |
st.u64 [%rd320+32], %rd324; | |
$L__tmp694: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd325, [%SP+208]; | |
ld.u64 %rd326, [%SP+216]; | |
mov.b64 %rd327, %rd293; | |
$L__tmp695: | |
.loc 11 0 5822 | |
mov.b64 %rd328, %rd326; | |
st.u64 [%SP+200], %rd328; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp696; | |
$L__tmp696: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd329, [%SP+200]; | |
shl.b64 %rd330, %rd327, 3; | |
add.s64 %rd331, %rd329, %rd330; | |
st.u64 [%rd325+40], %rd331; | |
$L__tmp697: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd332, [%SP+232]; | |
setp.ne.s64 %p33, %rd332, 0; | |
not.pred %p34, %p33; | |
not.pred %p35, %p34; | |
@%p35 bra $L__BB6_24; | |
bra.uni $L__BB6_23; | |
$L__BB6_23: | |
mov.u32 %r26, 0; | |
mov.b32 %r27, %r26; | |
bra.uni $L__BB6_24; | |
$L__BB6_24: | |
ld.u64 %rd333, [%SP+240]; | |
ld.u64 %rd334, [%SP+248]; | |
ld.u64 %rd335, [%SP+256]; | |
ld.u64 %rd336, [%SP+264]; | |
ld.u64 %rd337, [%SP+272]; | |
ld.u64 %rd338, [%SP+280]; | |
$L__tmp698: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 41 | |
st.u64 [%SP+2008], %rd338; | |
st.u64 [%SP+2000], %rd337; | |
st.u64 [%SP+1992], %rd336; | |
st.u64 [%SP+1984], %rd335; | |
st.u64 [%SP+1976], %rd334; | |
st.u64 [%SP+1968], %rd333; | |
ld.u64 %rd339, [%SP+1976]; | |
st.u64 [%SP+1720], %rd339; | |
add.u64 %rd340, %SP, 1720; | |
mov.b64 %rd341, %rd340; | |
st.u64 [%SP+288], %rd341; | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 41 | |
bra.uni $L__tmp699; | |
$L__tmp699: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
ld.u64 %rd342, [%SP+288]; | |
setp.ne.s64 %p36, %rd342, 0; | |
not.pred %p37, %p36; | |
not.pred %p38, %p37; | |
@%p38 bra $L__BB6_26; | |
bra.uni $L__BB6_25; | |
$L__BB6_25: | |
mov.u32 %r28, 0; | |
mov.b32 %r29, %r28; | |
bra.uni $L__BB6_26; | |
$L__BB6_26: | |
ld.u64 %rd343, [%SP+288]; | |
ld.u64 %rd344, [%rd343]; | |
mov.b64 %rd345, %rd344; | |
$L__tmp700: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 41 | |
ld.f64 %fd11, [%rd345]; | |
mul.f64 %fd12, %fd3, %fd11; | |
sub.f64 %fd13, %fd2, %fd12; | |
st.f64 [%SP+1688], %fd13; | |
add.u64 %rd346, %SP, 1688; | |
mov.b64 %rd6, %rd346; | |
add.u64 %rd347, %SP, 1440; | |
mov.b64 %rd348, %rd347; | |
st.u64 [%SP+328], %rd348; | |
mov.b64 %rd7, %rd1; | |
$L__tmp701: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 5 | |
bra.uni $L__tmp702; | |
$L__tmp702: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6763 | |
ld.u64 %rd349, [%SP+328]; | |
mov.b64 %rd350, %rd349; | |
st.u64 [%SP+320], %rd350; | |
mov.b64 %rd351, %rd7; | |
$L__tmp703: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6763 | |
bra.uni $L__tmp704; | |
$L__tmp704: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd352, [%SP+320]; | |
setp.ne.s64 %p39, %rd352, 0; | |
not.pred %p40, %p39; | |
not.pred %p41, %p40; | |
@%p41 bra $L__BB6_28; | |
bra.uni $L__BB6_27; | |
$L__BB6_27: | |
mov.u32 %r30, 0; | |
mov.b32 %r31, %r30; | |
bra.uni $L__BB6_28; | |
$L__tmp705: | |
$L__BB6_28: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6782 | |
ld.u64 %rd353, [%SP+328]; | |
ld.u64 %rd354, [%rd353+24]; | |
ld.u64 %rd355, [%SP+328]; | |
ld.u64 %rd356, [%rd355+32]; | |
ld.u64 %rd357, [%SP+328]; | |
ld.u64 %rd358, [%rd357+40]; | |
ld.u64 %rd359, [%SP+328]; | |
ld.u64 %rd360, [%rd359+48]; | |
ld.u64 %rd361, [%SP+328]; | |
ld.u64 %rd362, [%rd361+56]; | |
ld.u64 %rd363, [%SP+328]; | |
ld.u64 %rd364, [%rd363+64]; | |
add.u64 %rd365, %SP, 336; | |
mov.b64 %rd366, %rd365; | |
st.u64 [%SP+304], %rd366; | |
mov.b64 %rd367, %rd7; | |
$L__tmp706: | |
.loc 11 0 6782 | |
mov.b64 %rd368, %rd354; | |
$L__tmp707: | |
mov.b64 %rd369, %rd356; | |
$L__tmp708: | |
mov.b64 %rd370, %rd358; | |
$L__tmp709: | |
mov.b64 %rd371, %rd360; | |
$L__tmp710: | |
mov.b64 %rd372, %rd362; | |
$L__tmp711: | |
mov.b64 %rd373, %rd364; | |
st.u64 [%SP+312], %rd373; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6782 | |
bra.uni $L__tmp712; | |
$L__tmp712: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd374, [%SP+304]; | |
$L__tmp713: | |
.loc 11 0 5739 | |
mov.b64 %rd375, %rd367; | |
$L__tmp714: | |
mov.b64 %rd376, %rd368; | |
$L__tmp715: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp716; | |
$L__tmp716: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd377, %rd375, 3; | |
add.s64 %rd378, %rd376, %rd377; | |
st.u64 [%rd374], %rd378; | |
$L__tmp717: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd379, [%SP+304]; | |
mov.b64 %rd380, %rd367; | |
$L__tmp718: | |
.loc 11 0 5752 | |
mov.b64 %rd381, %rd369; | |
$L__tmp719: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp720; | |
$L__tmp720: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd382, %rd380, 3; | |
add.s64 %rd383, %rd381, %rd382; | |
st.u64 [%rd379+8], %rd383; | |
$L__tmp721: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd384, [%SP+304]; | |
mov.b64 %rd385, %rd367; | |
$L__tmp722: | |
.loc 11 0 5765 | |
mov.b64 %rd386, %rd370; | |
$L__tmp723: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp724; | |
$L__tmp724: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd387, %rd385, 3; | |
add.s64 %rd388, %rd386, %rd387; | |
st.u64 [%rd384+16], %rd388; | |
$L__tmp725: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd389, [%SP+304]; | |
mov.b64 %rd390, %rd367; | |
$L__tmp726: | |
.loc 11 0 5778 | |
mov.b64 %rd391, %rd371; | |
$L__tmp727: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp728; | |
$L__tmp728: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd392, %rd390, 1; | |
add.s64 %rd393, %rd391, %rd392; | |
st.u64 [%rd389+24], %rd393; | |
$L__tmp729: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd394, [%SP+304]; | |
mov.b64 %rd395, %rd367; | |
$L__tmp730: | |
.loc 11 0 5801 | |
mov.b64 %rd396, %rd372; | |
$L__tmp731: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp732; | |
$L__tmp732: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd397, %rd395, 2; | |
add.s64 %rd398, %rd396, %rd397; | |
st.u64 [%rd394+32], %rd398; | |
$L__tmp733: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd399, [%SP+304]; | |
ld.u64 %rd400, [%SP+312]; | |
mov.b64 %rd401, %rd367; | |
$L__tmp734: | |
.loc 11 0 5822 | |
mov.b64 %rd402, %rd400; | |
st.u64 [%SP+296], %rd402; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp735; | |
$L__tmp735: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd403, [%SP+296]; | |
shl.b64 %rd404, %rd401, 3; | |
add.s64 %rd405, %rd403, %rd404; | |
st.u64 [%rd399+40], %rd405; | |
$L__tmp736: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6782 | |
ld.u64 %rd406, [%SP+328]; | |
setp.ne.s64 %p42, %rd406, 0; | |
not.pred %p43, %p42; | |
not.pred %p44, %p43; | |
@%p44 bra $L__BB6_30; | |
bra.uni $L__BB6_29; | |
$L__BB6_29: | |
mov.u32 %r32, 0; | |
mov.b32 %r33, %r32; | |
bra.uni $L__BB6_30; | |
$L__BB6_30: | |
ld.u64 %rd407, [%SP+336]; | |
ld.u64 %rd408, [%SP+344]; | |
ld.u64 %rd409, [%SP+352]; | |
ld.u64 %rd410, [%SP+360]; | |
ld.u64 %rd411, [%SP+368]; | |
ld.u64 %rd412, [%SP+376]; | |
$L__tmp737: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 5 | |
st.u64 [%SP+2056], %rd412; | |
st.u64 [%SP+2048], %rd411; | |
st.u64 [%SP+2040], %rd410; | |
st.u64 [%SP+2032], %rd409; | |
st.u64 [%SP+2024], %rd408; | |
st.u64 [%SP+2016], %rd407; | |
ld.u64 %rd413, [%SP+2016]; | |
st.u64 [%SP+1680], %rd413; | |
add.u64 %rd414, %SP, 1680; | |
mov.b64 %rd415, %rd414; | |
st.u64 [%SP+384], %rd415; | |
mov.b64 %rd8, %rd6; | |
$L__tmp738: | |
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y; | |
.loc 18 70 5 | |
bra.uni $L__tmp739; | |
$L__tmp739: | |
//soa_v7.h:35 SOA_HOST_DEVICE_INLINE T& operator= (const T2& v) { return val_ = v; } | |
.loc 12 35 120 | |
ld.u64 %rd416, [%SP+384]; | |
setp.ne.s64 %p45, %rd416, 0; | |
not.pred %p46, %p45; | |
not.pred %p47, %p46; | |
@%p47 bra $L__BB6_32; | |
bra.uni $L__BB6_31; | |
$L__BB6_31: | |
mov.u32 %r34, 0; | |
mov.b32 %r35, %r34; | |
bra.uni $L__BB6_32; | |
$L__BB6_32: | |
ld.f64 %fd14, [%rd8]; | |
ld.u64 %rd417, [%SP+384]; | |
ld.u64 %rd418, [%rd417]; | |
mov.b64 %rd419, %rd418; | |
st.f64 [%rd419], %fd14; | |
mov.b64 %rd420, %rd419; | |
add.u64 %rd421, %SP, 1520; | |
mov.b64 %rd422, %rd421; | |
st.u64 [%SP+424], %rd422; | |
mov.b64 %rd9, %rd1; | |
$L__tmp740: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 14 | |
bra.uni $L__tmp741; | |
$L__tmp741: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
ld.u64 %rd423, [%SP+424]; | |
mov.b64 %rd424, %rd423; | |
st.u64 [%SP+416], %rd424; | |
mov.b64 %rd425, %rd9; | |
$L__tmp742: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
bra.uni $L__tmp743; | |
$L__tmp743: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd426, [%SP+416]; | |
setp.ne.s64 %p48, %rd426, 0; | |
not.pred %p49, %p48; | |
not.pred %p50, %p49; | |
@%p50 bra $L__BB6_34; | |
bra.uni $L__BB6_33; | |
$L__BB6_33: | |
mov.u32 %r36, 0; | |
mov.b32 %r37, %r36; | |
bra.uni $L__BB6_34; | |
$L__tmp744: | |
$L__BB6_34: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd427, [%SP+424]; | |
ld.u64 %rd428, [%rd427+24]; | |
ld.u64 %rd429, [%SP+424]; | |
ld.u64 %rd430, [%rd429+32]; | |
ld.u64 %rd431, [%SP+424]; | |
ld.u64 %rd432, [%rd431+40]; | |
ld.u64 %rd433, [%SP+424]; | |
ld.u64 %rd434, [%rd433+48]; | |
ld.u64 %rd435, [%SP+424]; | |
ld.u64 %rd436, [%rd435+56]; | |
ld.u64 %rd437, [%SP+424]; | |
ld.u64 %rd438, [%rd437+64]; | |
add.u64 %rd439, %SP, 432; | |
mov.b64 %rd440, %rd439; | |
st.u64 [%SP+400], %rd440; | |
mov.b64 %rd441, %rd9; | |
$L__tmp745: | |
.loc 11 0 6996 | |
mov.b64 %rd442, %rd428; | |
$L__tmp746: | |
mov.b64 %rd443, %rd430; | |
$L__tmp747: | |
mov.b64 %rd444, %rd432; | |
$L__tmp748: | |
mov.b64 %rd445, %rd434; | |
$L__tmp749: | |
mov.b64 %rd446, %rd436; | |
$L__tmp750: | |
mov.b64 %rd447, %rd438; | |
st.u64 [%SP+408], %rd447; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
bra.uni $L__tmp751; | |
$L__tmp751: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd448, [%SP+400]; | |
$L__tmp752: | |
.loc 11 0 5739 | |
mov.b64 %rd449, %rd441; | |
$L__tmp753: | |
mov.b64 %rd450, %rd442; | |
$L__tmp754: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp755; | |
$L__tmp755: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd451, %rd449, 3; | |
add.s64 %rd452, %rd450, %rd451; | |
st.u64 [%rd448], %rd452; | |
$L__tmp756: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd453, [%SP+400]; | |
mov.b64 %rd454, %rd441; | |
$L__tmp757: | |
.loc 11 0 5752 | |
mov.b64 %rd455, %rd443; | |
$L__tmp758: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp759; | |
$L__tmp759: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd456, %rd454, 3; | |
add.s64 %rd457, %rd455, %rd456; | |
st.u64 [%rd453+8], %rd457; | |
$L__tmp760: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd458, [%SP+400]; | |
mov.b64 %rd459, %rd441; | |
$L__tmp761: | |
.loc 11 0 5765 | |
mov.b64 %rd460, %rd444; | |
$L__tmp762: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp763; | |
$L__tmp763: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd461, %rd459, 3; | |
add.s64 %rd462, %rd460, %rd461; | |
st.u64 [%rd458+16], %rd462; | |
$L__tmp764: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd463, [%SP+400]; | |
mov.b64 %rd464, %rd441; | |
$L__tmp765: | |
.loc 11 0 5778 | |
mov.b64 %rd465, %rd445; | |
$L__tmp766: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp767; | |
$L__tmp767: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd466, %rd464, 1; | |
add.s64 %rd467, %rd465, %rd466; | |
st.u64 [%rd463+24], %rd467; | |
$L__tmp768: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd468, [%SP+400]; | |
mov.b64 %rd469, %rd441; | |
$L__tmp769: | |
.loc 11 0 5801 | |
mov.b64 %rd470, %rd446; | |
$L__tmp770: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp771; | |
$L__tmp771: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd471, %rd469, 2; | |
add.s64 %rd472, %rd470, %rd471; | |
st.u64 [%rd468+32], %rd472; | |
$L__tmp772: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd473, [%SP+400]; | |
ld.u64 %rd474, [%SP+408]; | |
mov.b64 %rd475, %rd441; | |
$L__tmp773: | |
.loc 11 0 5822 | |
mov.b64 %rd476, %rd474; | |
st.u64 [%SP+392], %rd476; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp774; | |
$L__tmp774: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd477, [%SP+392]; | |
shl.b64 %rd478, %rd475, 3; | |
add.s64 %rd479, %rd477, %rd478; | |
st.u64 [%rd473+40], %rd479; | |
$L__tmp775: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd480, [%SP+424]; | |
setp.ne.s64 %p51, %rd480, 0; | |
not.pred %p52, %p51; | |
not.pred %p53, %p52; | |
@%p53 bra $L__BB6_36; | |
bra.uni $L__BB6_35; | |
$L__BB6_35: | |
mov.u32 %r38, 0; | |
mov.b32 %r39, %r38; | |
bra.uni $L__BB6_36; | |
$L__BB6_36: | |
ld.u64 %rd481, [%SP+432]; | |
ld.u64 %rd482, [%SP+440]; | |
ld.u64 %rd483, [%SP+448]; | |
ld.u64 %rd484, [%SP+456]; | |
ld.u64 %rd485, [%SP+464]; | |
ld.u64 %rd486, [%SP+472]; | |
$L__tmp776: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 14 | |
st.u64 [%SP+2104], %rd486; | |
st.u64 [%SP+2096], %rd485; | |
st.u64 [%SP+2088], %rd484; | |
st.u64 [%SP+2080], %rd483; | |
st.u64 [%SP+2072], %rd482; | |
st.u64 [%SP+2064], %rd481; | |
ld.u64 %rd487, [%SP+2080]; | |
st.u64 [%SP+1744], %rd487; | |
add.u64 %rd488, %SP, 1744; | |
mov.b64 %rd489, %rd488; | |
st.u64 [%SP+480], %rd489; | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 14 | |
bra.uni $L__tmp777; | |
$L__tmp777: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
ld.u64 %rd490, [%SP+480]; | |
setp.ne.s64 %p54, %rd490, 0; | |
not.pred %p55, %p54; | |
not.pred %p56, %p55; | |
@%p56 bra $L__BB6_38; | |
bra.uni $L__BB6_37; | |
$L__BB6_37: | |
mov.u32 %r40, 0; | |
mov.b32 %r41, %r40; | |
bra.uni $L__BB6_38; | |
$L__BB6_38: | |
ld.u64 %rd491, [%SP+480]; | |
ld.u64 %rd492, [%rd491]; | |
mov.b64 %rd493, %rd492; | |
$L__tmp778: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 14 | |
ld.f64 %fd4, [%rd493]; | |
add.u64 %rd494, %SP, 1600; | |
mov.b64 %rd495, %rd494; | |
st.u64 [%SP+520], %rd495; | |
mov.b64 %rd10, %rd1; | |
$L__tmp779: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 23 | |
bra.uni $L__tmp780; | |
$L__tmp780: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
ld.u64 %rd496, [%SP+520]; | |
mov.b64 %rd497, %rd496; | |
st.u64 [%SP+512], %rd497; | |
mov.b64 %rd498, %rd10; | |
$L__tmp781: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
bra.uni $L__tmp782; | |
$L__tmp782: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd499, [%SP+512]; | |
setp.ne.s64 %p57, %rd499, 0; | |
not.pred %p58, %p57; | |
not.pred %p59, %p58; | |
@%p59 bra $L__BB6_40; | |
bra.uni $L__BB6_39; | |
$L__BB6_39: | |
mov.u32 %r42, 0; | |
mov.b32 %r43, %r42; | |
bra.uni $L__BB6_40; | |
$L__tmp783: | |
$L__BB6_40: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd500, [%SP+520]; | |
ld.u64 %rd501, [%rd500+24]; | |
ld.u64 %rd502, [%SP+520]; | |
ld.u64 %rd503, [%rd502+32]; | |
ld.u64 %rd504, [%SP+520]; | |
ld.u64 %rd505, [%rd504+40]; | |
ld.u64 %rd506, [%SP+520]; | |
ld.u64 %rd507, [%rd506+48]; | |
ld.u64 %rd508, [%SP+520]; | |
ld.u64 %rd509, [%rd508+56]; | |
ld.u64 %rd510, [%SP+520]; | |
ld.u64 %rd511, [%rd510+64]; | |
add.u64 %rd512, %SP, 528; | |
mov.b64 %rd513, %rd512; | |
st.u64 [%SP+496], %rd513; | |
mov.b64 %rd514, %rd10; | |
$L__tmp784: | |
.loc 11 0 6996 | |
mov.b64 %rd515, %rd501; | |
$L__tmp785: | |
mov.b64 %rd516, %rd503; | |
$L__tmp786: | |
mov.b64 %rd517, %rd505; | |
$L__tmp787: | |
mov.b64 %rd518, %rd507; | |
$L__tmp788: | |
mov.b64 %rd519, %rd509; | |
$L__tmp789: | |
mov.b64 %rd520, %rd511; | |
st.u64 [%SP+504], %rd520; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
bra.uni $L__tmp790; | |
$L__tmp790: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd521, [%SP+496]; | |
$L__tmp791: | |
.loc 11 0 5739 | |
mov.b64 %rd522, %rd514; | |
$L__tmp792: | |
mov.b64 %rd523, %rd515; | |
$L__tmp793: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp794; | |
$L__tmp794: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd524, %rd522, 3; | |
add.s64 %rd525, %rd523, %rd524; | |
st.u64 [%rd521], %rd525; | |
$L__tmp795: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd526, [%SP+496]; | |
mov.b64 %rd527, %rd514; | |
$L__tmp796: | |
.loc 11 0 5752 | |
mov.b64 %rd528, %rd516; | |
$L__tmp797: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp798; | |
$L__tmp798: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd529, %rd527, 3; | |
add.s64 %rd530, %rd528, %rd529; | |
st.u64 [%rd526+8], %rd530; | |
$L__tmp799: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd531, [%SP+496]; | |
mov.b64 %rd532, %rd514; | |
$L__tmp800: | |
.loc 11 0 5765 | |
mov.b64 %rd533, %rd517; | |
$L__tmp801: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp802; | |
$L__tmp802: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd534, %rd532, 3; | |
add.s64 %rd535, %rd533, %rd534; | |
st.u64 [%rd531+16], %rd535; | |
$L__tmp803: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd536, [%SP+496]; | |
mov.b64 %rd537, %rd514; | |
$L__tmp804: | |
.loc 11 0 5778 | |
mov.b64 %rd538, %rd518; | |
$L__tmp805: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp806; | |
$L__tmp806: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd539, %rd537, 1; | |
add.s64 %rd540, %rd538, %rd539; | |
st.u64 [%rd536+24], %rd540; | |
$L__tmp807: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd541, [%SP+496]; | |
mov.b64 %rd542, %rd514; | |
$L__tmp808: | |
.loc 11 0 5801 | |
mov.b64 %rd543, %rd519; | |
$L__tmp809: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp810; | |
$L__tmp810: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd544, %rd542, 2; | |
add.s64 %rd545, %rd543, %rd544; | |
st.u64 [%rd541+32], %rd545; | |
$L__tmp811: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd546, [%SP+496]; | |
ld.u64 %rd547, [%SP+504]; | |
mov.b64 %rd548, %rd514; | |
$L__tmp812: | |
.loc 11 0 5822 | |
mov.b64 %rd549, %rd547; | |
st.u64 [%SP+488], %rd549; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp813; | |
$L__tmp813: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd550, [%SP+488]; | |
shl.b64 %rd551, %rd548, 3; | |
add.s64 %rd552, %rd550, %rd551; | |
st.u64 [%rd546+40], %rd552; | |
$L__tmp814: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd553, [%SP+520]; | |
setp.ne.s64 %p60, %rd553, 0; | |
not.pred %p61, %p60; | |
not.pred %p62, %p61; | |
@%p62 bra $L__BB6_42; | |
bra.uni $L__BB6_41; | |
$L__BB6_41: | |
mov.u32 %r44, 0; | |
mov.b32 %r45, %r44; | |
bra.uni $L__BB6_42; | |
$L__BB6_42: | |
ld.u64 %rd554, [%SP+528]; | |
ld.u64 %rd555, [%SP+536]; | |
ld.u64 %rd556, [%SP+544]; | |
ld.u64 %rd557, [%SP+552]; | |
ld.u64 %rd558, [%SP+560]; | |
ld.u64 %rd559, [%SP+568]; | |
$L__tmp815: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 23 | |
st.u64 [%SP+2152], %rd559; | |
st.u64 [%SP+2144], %rd558; | |
st.u64 [%SP+2136], %rd557; | |
st.u64 [%SP+2128], %rd556; | |
st.u64 [%SP+2120], %rd555; | |
st.u64 [%SP+2112], %rd554; | |
ld.u64 %rd560, [%SP+2112]; | |
st.u64 [%SP+1752], %rd560; | |
add.u64 %rd561, %SP, 1752; | |
mov.b64 %rd562, %rd561; | |
st.u64 [%SP+576], %rd562; | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 23 | |
bra.uni $L__tmp816; | |
$L__tmp816: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
ld.u64 %rd563, [%SP+576]; | |
setp.ne.s64 %p63, %rd563, 0; | |
not.pred %p64, %p63; | |
not.pred %p65, %p64; | |
@%p65 bra $L__BB6_44; | |
bra.uni $L__BB6_43; | |
$L__BB6_43: | |
mov.u32 %r46, 0; | |
mov.b32 %r47, %r46; | |
bra.uni $L__BB6_44; | |
$L__BB6_44: | |
ld.u64 %rd564, [%SP+576]; | |
ld.u64 %rd565, [%rd564]; | |
mov.b64 %rd566, %rd565; | |
$L__tmp817: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 23 | |
ld.f64 %fd15, [%rd566]; | |
mul.f64 %fd5, %fd4, %fd15; | |
add.u64 %rd567, %SP, 1520; | |
mov.b64 %rd568, %rd567; | |
st.u64 [%SP+616], %rd568; | |
mov.b64 %rd11, %rd1; | |
$L__tmp818: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 32 | |
bra.uni $L__tmp819; | |
$L__tmp819: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
ld.u64 %rd569, [%SP+616]; | |
mov.b64 %rd570, %rd569; | |
st.u64 [%SP+608], %rd570; | |
mov.b64 %rd571, %rd11; | |
$L__tmp820: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
bra.uni $L__tmp821; | |
$L__tmp821: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd572, [%SP+608]; | |
setp.ne.s64 %p66, %rd572, 0; | |
not.pred %p67, %p66; | |
not.pred %p68, %p67; | |
@%p68 bra $L__BB6_46; | |
bra.uni $L__BB6_45; | |
$L__BB6_45: | |
mov.u32 %r48, 0; | |
mov.b32 %r49, %r48; | |
bra.uni $L__BB6_46; | |
$L__tmp822: | |
$L__BB6_46: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd573, [%SP+616]; | |
ld.u64 %rd574, [%rd573+24]; | |
ld.u64 %rd575, [%SP+616]; | |
ld.u64 %rd576, [%rd575+32]; | |
ld.u64 %rd577, [%SP+616]; | |
ld.u64 %rd578, [%rd577+40]; | |
ld.u64 %rd579, [%SP+616]; | |
ld.u64 %rd580, [%rd579+48]; | |
ld.u64 %rd581, [%SP+616]; | |
ld.u64 %rd582, [%rd581+56]; | |
ld.u64 %rd583, [%SP+616]; | |
ld.u64 %rd584, [%rd583+64]; | |
add.u64 %rd585, %SP, 624; | |
mov.b64 %rd586, %rd585; | |
st.u64 [%SP+592], %rd586; | |
mov.b64 %rd587, %rd11; | |
$L__tmp823: | |
.loc 11 0 6996 | |
mov.b64 %rd588, %rd574; | |
$L__tmp824: | |
mov.b64 %rd589, %rd576; | |
$L__tmp825: | |
mov.b64 %rd590, %rd578; | |
$L__tmp826: | |
mov.b64 %rd591, %rd580; | |
$L__tmp827: | |
mov.b64 %rd592, %rd582; | |
$L__tmp828: | |
mov.b64 %rd593, %rd584; | |
st.u64 [%SP+600], %rd593; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
bra.uni $L__tmp829; | |
$L__tmp829: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd594, [%SP+592]; | |
$L__tmp830: | |
.loc 11 0 5739 | |
mov.b64 %rd595, %rd587; | |
$L__tmp831: | |
mov.b64 %rd596, %rd588; | |
$L__tmp832: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp833; | |
$L__tmp833: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd597, %rd595, 3; | |
add.s64 %rd598, %rd596, %rd597; | |
st.u64 [%rd594], %rd598; | |
$L__tmp834: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd599, [%SP+592]; | |
mov.b64 %rd600, %rd587; | |
$L__tmp835: | |
.loc 11 0 5752 | |
mov.b64 %rd601, %rd589; | |
$L__tmp836: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp837; | |
$L__tmp837: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd602, %rd600, 3; | |
add.s64 %rd603, %rd601, %rd602; | |
st.u64 [%rd599+8], %rd603; | |
$L__tmp838: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd604, [%SP+592]; | |
mov.b64 %rd605, %rd587; | |
$L__tmp839: | |
.loc 11 0 5765 | |
mov.b64 %rd606, %rd590; | |
$L__tmp840: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp841; | |
$L__tmp841: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd607, %rd605, 3; | |
add.s64 %rd608, %rd606, %rd607; | |
st.u64 [%rd604+16], %rd608; | |
$L__tmp842: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd609, [%SP+592]; | |
mov.b64 %rd610, %rd587; | |
$L__tmp843: | |
.loc 11 0 5778 | |
mov.b64 %rd611, %rd591; | |
$L__tmp844: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp845; | |
$L__tmp845: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd612, %rd610, 1; | |
add.s64 %rd613, %rd611, %rd612; | |
st.u64 [%rd609+24], %rd613; | |
$L__tmp846: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd614, [%SP+592]; | |
mov.b64 %rd615, %rd587; | |
$L__tmp847: | |
.loc 11 0 5801 | |
mov.b64 %rd616, %rd592; | |
$L__tmp848: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp849; | |
$L__tmp849: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd617, %rd615, 2; | |
add.s64 %rd618, %rd616, %rd617; | |
st.u64 [%rd614+32], %rd618; | |
$L__tmp850: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd619, [%SP+592]; | |
ld.u64 %rd620, [%SP+600]; | |
mov.b64 %rd621, %rd587; | |
$L__tmp851: | |
.loc 11 0 5822 | |
mov.b64 %rd622, %rd620; | |
st.u64 [%SP+584], %rd622; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp852; | |
$L__tmp852: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd623, [%SP+584]; | |
shl.b64 %rd624, %rd621, 3; | |
add.s64 %rd625, %rd623, %rd624; | |
st.u64 [%rd619+40], %rd625; | |
$L__tmp853: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd626, [%SP+616]; | |
setp.ne.s64 %p69, %rd626, 0; | |
not.pred %p70, %p69; | |
not.pred %p71, %p70; | |
@%p71 bra $L__BB6_48; | |
bra.uni $L__BB6_47; | |
$L__BB6_47: | |
mov.u32 %r50, 0; | |
mov.b32 %r51, %r50; | |
bra.uni $L__BB6_48; | |
$L__BB6_48: | |
ld.u64 %rd627, [%SP+624]; | |
ld.u64 %rd628, [%SP+632]; | |
ld.u64 %rd629, [%SP+640]; | |
ld.u64 %rd630, [%SP+648]; | |
ld.u64 %rd631, [%SP+656]; | |
ld.u64 %rd632, [%SP+664]; | |
$L__tmp854: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 32 | |
st.u64 [%SP+2200], %rd632; | |
st.u64 [%SP+2192], %rd631; | |
st.u64 [%SP+2184], %rd630; | |
st.u64 [%SP+2176], %rd629; | |
st.u64 [%SP+2168], %rd628; | |
st.u64 [%SP+2160], %rd627; | |
ld.u64 %rd633, [%SP+2160]; | |
st.u64 [%SP+1760], %rd633; | |
add.u64 %rd634, %SP, 1760; | |
mov.b64 %rd635, %rd634; | |
st.u64 [%SP+672], %rd635; | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 32 | |
bra.uni $L__tmp855; | |
$L__tmp855: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
ld.u64 %rd636, [%SP+672]; | |
setp.ne.s64 %p72, %rd636, 0; | |
not.pred %p73, %p72; | |
not.pred %p74, %p73; | |
@%p74 bra $L__BB6_50; | |
bra.uni $L__BB6_49; | |
$L__BB6_49: | |
mov.u32 %r52, 0; | |
mov.b32 %r53, %r52; | |
bra.uni $L__BB6_50; | |
$L__BB6_50: | |
ld.u64 %rd637, [%SP+672]; | |
ld.u64 %rd638, [%rd637]; | |
mov.b64 %rd639, %rd638; | |
$L__tmp856: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 32 | |
ld.f64 %fd6, [%rd639]; | |
add.u64 %rd640, %SP, 1600; | |
mov.b64 %rd641, %rd640; | |
st.u64 [%SP+712], %rd641; | |
mov.b64 %rd12, %rd1; | |
$L__tmp857: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 41 | |
bra.uni $L__tmp858; | |
$L__tmp858: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
ld.u64 %rd642, [%SP+712]; | |
mov.b64 %rd643, %rd642; | |
st.u64 [%SP+704], %rd643; | |
mov.b64 %rd644, %rd12; | |
$L__tmp859: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
bra.uni $L__tmp860; | |
$L__tmp860: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd645, [%SP+704]; | |
setp.ne.s64 %p75, %rd645, 0; | |
not.pred %p76, %p75; | |
not.pred %p77, %p76; | |
@%p77 bra $L__BB6_52; | |
bra.uni $L__BB6_51; | |
$L__BB6_51: | |
mov.u32 %r54, 0; | |
mov.b32 %r55, %r54; | |
bra.uni $L__BB6_52; | |
$L__tmp861: | |
$L__BB6_52: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd646, [%SP+712]; | |
ld.u64 %rd647, [%rd646+24]; | |
ld.u64 %rd648, [%SP+712]; | |
ld.u64 %rd649, [%rd648+32]; | |
ld.u64 %rd650, [%SP+712]; | |
ld.u64 %rd651, [%rd650+40]; | |
ld.u64 %rd652, [%SP+712]; | |
ld.u64 %rd653, [%rd652+48]; | |
ld.u64 %rd654, [%SP+712]; | |
ld.u64 %rd655, [%rd654+56]; | |
ld.u64 %rd656, [%SP+712]; | |
ld.u64 %rd657, [%rd656+64]; | |
add.u64 %rd658, %SP, 720; | |
mov.b64 %rd659, %rd658; | |
st.u64 [%SP+688], %rd659; | |
mov.b64 %rd660, %rd12; | |
$L__tmp862: | |
.loc 11 0 6996 | |
mov.b64 %rd661, %rd647; | |
$L__tmp863: | |
mov.b64 %rd662, %rd649; | |
$L__tmp864: | |
mov.b64 %rd663, %rd651; | |
$L__tmp865: | |
mov.b64 %rd664, %rd653; | |
$L__tmp866: | |
mov.b64 %rd665, %rd655; | |
$L__tmp867: | |
mov.b64 %rd666, %rd657; | |
st.u64 [%SP+696], %rd666; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
bra.uni $L__tmp868; | |
$L__tmp868: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd667, [%SP+688]; | |
$L__tmp869: | |
.loc 11 0 5739 | |
mov.b64 %rd668, %rd660; | |
$L__tmp870: | |
mov.b64 %rd669, %rd661; | |
$L__tmp871: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp872; | |
$L__tmp872: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd670, %rd668, 3; | |
add.s64 %rd671, %rd669, %rd670; | |
st.u64 [%rd667], %rd671; | |
$L__tmp873: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd672, [%SP+688]; | |
mov.b64 %rd673, %rd660; | |
$L__tmp874: | |
.loc 11 0 5752 | |
mov.b64 %rd674, %rd662; | |
$L__tmp875: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp876; | |
$L__tmp876: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd675, %rd673, 3; | |
add.s64 %rd676, %rd674, %rd675; | |
st.u64 [%rd672+8], %rd676; | |
$L__tmp877: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd677, [%SP+688]; | |
mov.b64 %rd678, %rd660; | |
$L__tmp878: | |
.loc 11 0 5765 | |
mov.b64 %rd679, %rd663; | |
$L__tmp879: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp880; | |
$L__tmp880: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd680, %rd678, 3; | |
add.s64 %rd681, %rd679, %rd680; | |
st.u64 [%rd677+16], %rd681; | |
$L__tmp881: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd682, [%SP+688]; | |
mov.b64 %rd683, %rd660; | |
$L__tmp882: | |
.loc 11 0 5778 | |
mov.b64 %rd684, %rd664; | |
$L__tmp883: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp884; | |
$L__tmp884: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd685, %rd683, 1; | |
add.s64 %rd686, %rd684, %rd685; | |
st.u64 [%rd682+24], %rd686; | |
$L__tmp885: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd687, [%SP+688]; | |
mov.b64 %rd688, %rd660; | |
$L__tmp886: | |
.loc 11 0 5801 | |
mov.b64 %rd689, %rd665; | |
$L__tmp887: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp888; | |
$L__tmp888: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd690, %rd688, 2; | |
add.s64 %rd691, %rd689, %rd690; | |
st.u64 [%rd687+32], %rd691; | |
$L__tmp889: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd692, [%SP+688]; | |
ld.u64 %rd693, [%SP+696]; | |
mov.b64 %rd694, %rd660; | |
$L__tmp890: | |
.loc 11 0 5822 | |
mov.b64 %rd695, %rd693; | |
st.u64 [%SP+680], %rd695; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp891; | |
$L__tmp891: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd696, [%SP+680]; | |
shl.b64 %rd697, %rd694, 3; | |
add.s64 %rd698, %rd696, %rd697; | |
st.u64 [%rd692+40], %rd698; | |
$L__tmp892: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd699, [%SP+712]; | |
setp.ne.s64 %p78, %rd699, 0; | |
not.pred %p79, %p78; | |
not.pred %p80, %p79; | |
@%p80 bra $L__BB6_54; | |
bra.uni $L__BB6_53; | |
$L__BB6_53: | |
mov.u32 %r56, 0; | |
mov.b32 %r57, %r56; | |
bra.uni $L__BB6_54; | |
$L__BB6_54: | |
ld.u64 %rd700, [%SP+720]; | |
ld.u64 %rd701, [%SP+728]; | |
ld.u64 %rd702, [%SP+736]; | |
ld.u64 %rd703, [%SP+744]; | |
ld.u64 %rd704, [%SP+752]; | |
ld.u64 %rd705, [%SP+760]; | |
$L__tmp893: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 41 | |
st.u64 [%SP+2248], %rd705; | |
st.u64 [%SP+2240], %rd704; | |
st.u64 [%SP+2232], %rd703; | |
st.u64 [%SP+2224], %rd702; | |
st.u64 [%SP+2216], %rd701; | |
st.u64 [%SP+2208], %rd700; | |
ld.u64 %rd706, [%SP+2224]; | |
st.u64 [%SP+1768], %rd706; | |
add.u64 %rd707, %SP, 1768; | |
mov.b64 %rd708, %rd707; | |
st.u64 [%SP+768], %rd708; | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 41 | |
bra.uni $L__tmp894; | |
$L__tmp894: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
ld.u64 %rd709, [%SP+768]; | |
setp.ne.s64 %p81, %rd709, 0; | |
not.pred %p82, %p81; | |
not.pred %p83, %p82; | |
@%p83 bra $L__BB6_56; | |
bra.uni $L__BB6_55; | |
$L__BB6_55: | |
mov.u32 %r58, 0; | |
mov.b32 %r59, %r58; | |
bra.uni $L__BB6_56; | |
$L__BB6_56: | |
ld.u64 %rd710, [%SP+768]; | |
ld.u64 %rd711, [%rd710]; | |
mov.b64 %rd712, %rd711; | |
$L__tmp895: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 41 | |
ld.f64 %fd16, [%rd712]; | |
mul.f64 %fd17, %fd6, %fd16; | |
sub.f64 %fd18, %fd5, %fd17; | |
st.f64 [%SP+1736], %fd18; | |
add.u64 %rd713, %SP, 1736; | |
mov.b64 %rd13, %rd713; | |
add.u64 %rd714, %SP, 1440; | |
mov.b64 %rd715, %rd714; | |
st.u64 [%SP+808], %rd715; | |
mov.b64 %rd14, %rd1; | |
$L__tmp896: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 5 | |
bra.uni $L__tmp897; | |
$L__tmp897: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6763 | |
ld.u64 %rd716, [%SP+808]; | |
mov.b64 %rd717, %rd716; | |
st.u64 [%SP+800], %rd717; | |
mov.b64 %rd718, %rd14; | |
$L__tmp898: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6763 | |
bra.uni $L__tmp899; | |
$L__tmp899: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd719, [%SP+800]; | |
setp.ne.s64 %p84, %rd719, 0; | |
not.pred %p85, %p84; | |
not.pred %p86, %p85; | |
@%p86 bra $L__BB6_58; | |
bra.uni $L__BB6_57; | |
$L__BB6_57: | |
mov.u32 %r60, 0; | |
mov.b32 %r61, %r60; | |
bra.uni $L__BB6_58; | |
$L__tmp900: | |
$L__BB6_58: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6782 | |
ld.u64 %rd720, [%SP+808]; | |
ld.u64 %rd721, [%rd720+24]; | |
ld.u64 %rd722, [%SP+808]; | |
ld.u64 %rd723, [%rd722+32]; | |
ld.u64 %rd724, [%SP+808]; | |
ld.u64 %rd725, [%rd724+40]; | |
ld.u64 %rd726, [%SP+808]; | |
ld.u64 %rd727, [%rd726+48]; | |
ld.u64 %rd728, [%SP+808]; | |
ld.u64 %rd729, [%rd728+56]; | |
ld.u64 %rd730, [%SP+808]; | |
ld.u64 %rd731, [%rd730+64]; | |
add.u64 %rd732, %SP, 816; | |
mov.b64 %rd733, %rd732; | |
st.u64 [%SP+784], %rd733; | |
mov.b64 %rd734, %rd14; | |
$L__tmp901: | |
.loc 11 0 6782 | |
mov.b64 %rd735, %rd721; | |
$L__tmp902: | |
mov.b64 %rd736, %rd723; | |
$L__tmp903: | |
mov.b64 %rd737, %rd725; | |
$L__tmp904: | |
mov.b64 %rd738, %rd727; | |
$L__tmp905: | |
mov.b64 %rd739, %rd729; | |
$L__tmp906: | |
mov.b64 %rd740, %rd731; | |
st.u64 [%SP+792], %rd740; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6782 | |
bra.uni $L__tmp907; | |
$L__tmp907: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd741, [%SP+784]; | |
$L__tmp908: | |
.loc 11 0 5739 | |
mov.b64 %rd742, %rd734; | |
$L__tmp909: | |
mov.b64 %rd743, %rd735; | |
$L__tmp910: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp911; | |
$L__tmp911: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd744, %rd742, 3; | |
add.s64 %rd745, %rd743, %rd744; | |
st.u64 [%rd741], %rd745; | |
$L__tmp912: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd746, [%SP+784]; | |
mov.b64 %rd747, %rd734; | |
$L__tmp913: | |
.loc 11 0 5752 | |
mov.b64 %rd748, %rd736; | |
$L__tmp914: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp915; | |
$L__tmp915: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd749, %rd747, 3; | |
add.s64 %rd750, %rd748, %rd749; | |
st.u64 [%rd746+8], %rd750; | |
$L__tmp916: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd751, [%SP+784]; | |
mov.b64 %rd752, %rd734; | |
$L__tmp917: | |
.loc 11 0 5765 | |
mov.b64 %rd753, %rd737; | |
$L__tmp918: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp919; | |
$L__tmp919: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd754, %rd752, 3; | |
add.s64 %rd755, %rd753, %rd754; | |
st.u64 [%rd751+16], %rd755; | |
$L__tmp920: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd756, [%SP+784]; | |
mov.b64 %rd757, %rd734; | |
$L__tmp921: | |
.loc 11 0 5778 | |
mov.b64 %rd758, %rd738; | |
$L__tmp922: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp923; | |
$L__tmp923: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd759, %rd757, 1; | |
add.s64 %rd760, %rd758, %rd759; | |
st.u64 [%rd756+24], %rd760; | |
$L__tmp924: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd761, [%SP+784]; | |
mov.b64 %rd762, %rd734; | |
$L__tmp925: | |
.loc 11 0 5801 | |
mov.b64 %rd763, %rd739; | |
$L__tmp926: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp927; | |
$L__tmp927: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd764, %rd762, 2; | |
add.s64 %rd765, %rd763, %rd764; | |
st.u64 [%rd761+32], %rd765; | |
$L__tmp928: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd766, [%SP+784]; | |
ld.u64 %rd767, [%SP+792]; | |
mov.b64 %rd768, %rd734; | |
$L__tmp929: | |
.loc 11 0 5822 | |
mov.b64 %rd769, %rd767; | |
st.u64 [%SP+776], %rd769; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp930; | |
$L__tmp930: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd770, [%SP+776]; | |
shl.b64 %rd771, %rd768, 3; | |
add.s64 %rd772, %rd770, %rd771; | |
st.u64 [%rd766+40], %rd772; | |
$L__tmp931: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6782 | |
ld.u64 %rd773, [%SP+808]; | |
setp.ne.s64 %p87, %rd773, 0; | |
not.pred %p88, %p87; | |
not.pred %p89, %p88; | |
@%p89 bra $L__BB6_60; | |
bra.uni $L__BB6_59; | |
$L__BB6_59: | |
mov.u32 %r62, 0; | |
mov.b32 %r63, %r62; | |
bra.uni $L__BB6_60; | |
$L__BB6_60: | |
ld.u64 %rd774, [%SP+816]; | |
ld.u64 %rd775, [%SP+824]; | |
ld.u64 %rd776, [%SP+832]; | |
ld.u64 %rd777, [%SP+840]; | |
ld.u64 %rd778, [%SP+848]; | |
ld.u64 %rd779, [%SP+856]; | |
$L__tmp932: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 5 | |
st.u64 [%SP+2296], %rd779; | |
st.u64 [%SP+2288], %rd778; | |
st.u64 [%SP+2280], %rd777; | |
st.u64 [%SP+2272], %rd776; | |
st.u64 [%SP+2264], %rd775; | |
st.u64 [%SP+2256], %rd774; | |
ld.u64 %rd780, [%SP+2264]; | |
st.u64 [%SP+1728], %rd780; | |
add.u64 %rd781, %SP, 1728; | |
mov.b64 %rd782, %rd781; | |
st.u64 [%SP+864], %rd782; | |
mov.b64 %rd15, %rd13; | |
$L__tmp933: | |
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z; | |
.loc 18 71 5 | |
bra.uni $L__tmp934; | |
$L__tmp934: | |
//soa_v7.h:35 SOA_HOST_DEVICE_INLINE T& operator= (const T2& v) { return val_ = v; } | |
.loc 12 35 120 | |
ld.u64 %rd783, [%SP+864]; | |
setp.ne.s64 %p90, %rd783, 0; | |
not.pred %p91, %p90; | |
not.pred %p92, %p91; | |
@%p92 bra $L__BB6_62; | |
bra.uni $L__BB6_61; | |
$L__BB6_61: | |
mov.u32 %r64, 0; | |
mov.b32 %r65, %r64; | |
bra.uni $L__BB6_62; | |
$L__BB6_62: | |
ld.f64 %fd19, [%rd15]; | |
ld.u64 %rd784, [%SP+864]; | |
ld.u64 %rd785, [%rd784]; | |
mov.b64 %rd786, %rd785; | |
st.f64 [%rd786], %fd19; | |
mov.b64 %rd787, %rd786; | |
add.u64 %rd788, %SP, 1520; | |
mov.b64 %rd789, %rd788; | |
st.u64 [%SP+904], %rd789; | |
mov.b64 %rd16, %rd1; | |
$L__tmp935: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 14 | |
bra.uni $L__tmp936; | |
$L__tmp936: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
ld.u64 %rd790, [%SP+904]; | |
mov.b64 %rd791, %rd790; | |
st.u64 [%SP+896], %rd791; | |
mov.b64 %rd792, %rd16; | |
$L__tmp937: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
bra.uni $L__tmp938; | |
$L__tmp938: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd793, [%SP+896]; | |
setp.ne.s64 %p93, %rd793, 0; | |
not.pred %p94, %p93; | |
not.pred %p95, %p94; | |
@%p95 bra $L__BB6_64; | |
bra.uni $L__BB6_63; | |
$L__BB6_63: | |
mov.u32 %r66, 0; | |
mov.b32 %r67, %r66; | |
bra.uni $L__BB6_64; | |
$L__tmp939: | |
$L__BB6_64: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd794, [%SP+904]; | |
ld.u64 %rd795, [%rd794+24]; | |
ld.u64 %rd796, [%SP+904]; | |
ld.u64 %rd797, [%rd796+32]; | |
ld.u64 %rd798, [%SP+904]; | |
ld.u64 %rd799, [%rd798+40]; | |
ld.u64 %rd800, [%SP+904]; | |
ld.u64 %rd801, [%rd800+48]; | |
ld.u64 %rd802, [%SP+904]; | |
ld.u64 %rd803, [%rd802+56]; | |
ld.u64 %rd804, [%SP+904]; | |
ld.u64 %rd805, [%rd804+64]; | |
add.u64 %rd806, %SP, 912; | |
mov.b64 %rd807, %rd806; | |
st.u64 [%SP+880], %rd807; | |
mov.b64 %rd808, %rd16; | |
$L__tmp940: | |
.loc 11 0 6996 | |
mov.b64 %rd809, %rd795; | |
$L__tmp941: | |
mov.b64 %rd810, %rd797; | |
$L__tmp942: | |
mov.b64 %rd811, %rd799; | |
$L__tmp943: | |
mov.b64 %rd812, %rd801; | |
$L__tmp944: | |
mov.b64 %rd813, %rd803; | |
$L__tmp945: | |
mov.b64 %rd814, %rd805; | |
st.u64 [%SP+888], %rd814; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
bra.uni $L__tmp946; | |
$L__tmp946: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd815, [%SP+880]; | |
$L__tmp947: | |
.loc 11 0 5739 | |
mov.b64 %rd816, %rd808; | |
$L__tmp948: | |
mov.b64 %rd817, %rd809; | |
$L__tmp949: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp950; | |
$L__tmp950: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd818, %rd816, 3; | |
add.s64 %rd819, %rd817, %rd818; | |
st.u64 [%rd815], %rd819; | |
$L__tmp951: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd820, [%SP+880]; | |
mov.b64 %rd821, %rd808; | |
$L__tmp952: | |
.loc 11 0 5752 | |
mov.b64 %rd822, %rd810; | |
$L__tmp953: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp954; | |
$L__tmp954: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd823, %rd821, 3; | |
add.s64 %rd824, %rd822, %rd823; | |
st.u64 [%rd820+8], %rd824; | |
$L__tmp955: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd825, [%SP+880]; | |
mov.b64 %rd826, %rd808; | |
$L__tmp956: | |
.loc 11 0 5765 | |
mov.b64 %rd827, %rd811; | |
$L__tmp957: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp958; | |
$L__tmp958: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd828, %rd826, 3; | |
add.s64 %rd829, %rd827, %rd828; | |
st.u64 [%rd825+16], %rd829; | |
$L__tmp959: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd830, [%SP+880]; | |
mov.b64 %rd831, %rd808; | |
$L__tmp960: | |
.loc 11 0 5778 | |
mov.b64 %rd832, %rd812; | |
$L__tmp961: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp962; | |
$L__tmp962: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd833, %rd831, 1; | |
add.s64 %rd834, %rd832, %rd833; | |
st.u64 [%rd830+24], %rd834; | |
$L__tmp963: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd835, [%SP+880]; | |
mov.b64 %rd836, %rd808; | |
$L__tmp964: | |
.loc 11 0 5801 | |
mov.b64 %rd837, %rd813; | |
$L__tmp965: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp966; | |
$L__tmp966: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd838, %rd836, 2; | |
add.s64 %rd839, %rd837, %rd838; | |
st.u64 [%rd835+32], %rd839; | |
$L__tmp967: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd840, [%SP+880]; | |
ld.u64 %rd841, [%SP+888]; | |
mov.b64 %rd842, %rd808; | |
$L__tmp968: | |
.loc 11 0 5822 | |
mov.b64 %rd843, %rd841; | |
st.u64 [%SP+872], %rd843; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp969; | |
$L__tmp969: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd844, [%SP+872]; | |
shl.b64 %rd845, %rd842, 3; | |
add.s64 %rd846, %rd844, %rd845; | |
st.u64 [%rd840+40], %rd846; | |
$L__tmp970: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd847, [%SP+904]; | |
setp.ne.s64 %p96, %rd847, 0; | |
not.pred %p97, %p96; | |
not.pred %p98, %p97; | |
@%p98 bra $L__BB6_66; | |
bra.uni $L__BB6_65; | |
$L__BB6_65: | |
mov.u32 %r68, 0; | |
mov.b32 %r69, %r68; | |
bra.uni $L__BB6_66; | |
$L__BB6_66: | |
ld.u64 %rd848, [%SP+912]; | |
ld.u64 %rd849, [%SP+920]; | |
ld.u64 %rd850, [%SP+928]; | |
ld.u64 %rd851, [%SP+936]; | |
ld.u64 %rd852, [%SP+944]; | |
ld.u64 %rd853, [%SP+952]; | |
$L__tmp971: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 14 | |
st.u64 [%SP+2344], %rd853; | |
st.u64 [%SP+2336], %rd852; | |
st.u64 [%SP+2328], %rd851; | |
st.u64 [%SP+2320], %rd850; | |
st.u64 [%SP+2312], %rd849; | |
st.u64 [%SP+2304], %rd848; | |
ld.u64 %rd854, [%SP+2304]; | |
st.u64 [%SP+1792], %rd854; | |
add.u64 %rd855, %SP, 1792; | |
mov.b64 %rd856, %rd855; | |
st.u64 [%SP+960], %rd856; | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 14 | |
bra.uni $L__tmp972; | |
$L__tmp972: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
ld.u64 %rd857, [%SP+960]; | |
setp.ne.s64 %p99, %rd857, 0; | |
not.pred %p100, %p99; | |
not.pred %p101, %p100; | |
@%p101 bra $L__BB6_68; | |
bra.uni $L__BB6_67; | |
$L__BB6_67: | |
mov.u32 %r70, 0; | |
mov.b32 %r71, %r70; | |
bra.uni $L__BB6_68; | |
$L__BB6_68: | |
ld.u64 %rd858, [%SP+960]; | |
ld.u64 %rd859, [%rd858]; | |
mov.b64 %rd860, %rd859; | |
$L__tmp973: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 14 | |
ld.f64 %fd7, [%rd860]; | |
add.u64 %rd861, %SP, 1600; | |
mov.b64 %rd862, %rd861; | |
st.u64 [%SP+1000], %rd862; | |
mov.b64 %rd17, %rd1; | |
$L__tmp974: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 23 | |
bra.uni $L__tmp975; | |
$L__tmp975: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
ld.u64 %rd863, [%SP+1000]; | |
mov.b64 %rd864, %rd863; | |
st.u64 [%SP+992], %rd864; | |
mov.b64 %rd865, %rd17; | |
$L__tmp976: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
bra.uni $L__tmp977; | |
$L__tmp977: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd866, [%SP+992]; | |
setp.ne.s64 %p102, %rd866, 0; | |
not.pred %p103, %p102; | |
not.pred %p104, %p103; | |
@%p104 bra $L__BB6_70; | |
bra.uni $L__BB6_69; | |
$L__BB6_69: | |
mov.u32 %r72, 0; | |
mov.b32 %r73, %r72; | |
bra.uni $L__BB6_70; | |
$L__tmp978: | |
$L__BB6_70: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd867, [%SP+1000]; | |
ld.u64 %rd868, [%rd867+24]; | |
ld.u64 %rd869, [%SP+1000]; | |
ld.u64 %rd870, [%rd869+32]; | |
ld.u64 %rd871, [%SP+1000]; | |
ld.u64 %rd872, [%rd871+40]; | |
ld.u64 %rd873, [%SP+1000]; | |
ld.u64 %rd874, [%rd873+48]; | |
ld.u64 %rd875, [%SP+1000]; | |
ld.u64 %rd876, [%rd875+56]; | |
ld.u64 %rd877, [%SP+1000]; | |
ld.u64 %rd878, [%rd877+64]; | |
add.u64 %rd879, %SP, 1008; | |
mov.b64 %rd880, %rd879; | |
st.u64 [%SP+976], %rd880; | |
mov.b64 %rd881, %rd17; | |
$L__tmp979: | |
.loc 11 0 6996 | |
mov.b64 %rd882, %rd868; | |
$L__tmp980: | |
mov.b64 %rd883, %rd870; | |
$L__tmp981: | |
mov.b64 %rd884, %rd872; | |
$L__tmp982: | |
mov.b64 %rd885, %rd874; | |
$L__tmp983: | |
mov.b64 %rd886, %rd876; | |
$L__tmp984: | |
mov.b64 %rd887, %rd878; | |
st.u64 [%SP+984], %rd887; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
bra.uni $L__tmp985; | |
$L__tmp985: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd888, [%SP+976]; | |
$L__tmp986: | |
.loc 11 0 5739 | |
mov.b64 %rd889, %rd881; | |
$L__tmp987: | |
mov.b64 %rd890, %rd882; | |
$L__tmp988: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp989; | |
$L__tmp989: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd891, %rd889, 3; | |
add.s64 %rd892, %rd890, %rd891; | |
st.u64 [%rd888], %rd892; | |
$L__tmp990: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd893, [%SP+976]; | |
mov.b64 %rd894, %rd881; | |
$L__tmp991: | |
.loc 11 0 5752 | |
mov.b64 %rd895, %rd883; | |
$L__tmp992: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp993; | |
$L__tmp993: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd896, %rd894, 3; | |
add.s64 %rd897, %rd895, %rd896; | |
st.u64 [%rd893+8], %rd897; | |
$L__tmp994: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd898, [%SP+976]; | |
mov.b64 %rd899, %rd881; | |
$L__tmp995: | |
.loc 11 0 5765 | |
mov.b64 %rd900, %rd884; | |
$L__tmp996: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp997; | |
$L__tmp997: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd901, %rd899, 3; | |
add.s64 %rd902, %rd900, %rd901; | |
st.u64 [%rd898+16], %rd902; | |
$L__tmp998: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd903, [%SP+976]; | |
mov.b64 %rd904, %rd881; | |
$L__tmp999: | |
.loc 11 0 5778 | |
mov.b64 %rd905, %rd885; | |
$L__tmp1000: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp1001; | |
$L__tmp1001: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd906, %rd904, 1; | |
add.s64 %rd907, %rd905, %rd906; | |
st.u64 [%rd903+24], %rd907; | |
$L__tmp1002: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd908, [%SP+976]; | |
mov.b64 %rd909, %rd881; | |
$L__tmp1003: | |
.loc 11 0 5801 | |
mov.b64 %rd910, %rd886; | |
$L__tmp1004: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp1005; | |
$L__tmp1005: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd911, %rd909, 2; | |
add.s64 %rd912, %rd910, %rd911; | |
st.u64 [%rd908+32], %rd912; | |
$L__tmp1006: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd913, [%SP+976]; | |
ld.u64 %rd914, [%SP+984]; | |
mov.b64 %rd915, %rd881; | |
$L__tmp1007: | |
.loc 11 0 5822 | |
mov.b64 %rd916, %rd914; | |
st.u64 [%SP+968], %rd916; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp1008; | |
$L__tmp1008: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd917, [%SP+968]; | |
shl.b64 %rd918, %rd915, 3; | |
add.s64 %rd919, %rd917, %rd918; | |
st.u64 [%rd913+40], %rd919; | |
$L__tmp1009: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd920, [%SP+1000]; | |
setp.ne.s64 %p105, %rd920, 0; | |
not.pred %p106, %p105; | |
not.pred %p107, %p106; | |
@%p107 bra $L__BB6_72; | |
bra.uni $L__BB6_71; | |
$L__BB6_71: | |
mov.u32 %r74, 0; | |
mov.b32 %r75, %r74; | |
bra.uni $L__BB6_72; | |
$L__BB6_72: | |
ld.u64 %rd921, [%SP+1008]; | |
ld.u64 %rd922, [%SP+1016]; | |
ld.u64 %rd923, [%SP+1024]; | |
ld.u64 %rd924, [%SP+1032]; | |
ld.u64 %rd925, [%SP+1040]; | |
ld.u64 %rd926, [%SP+1048]; | |
$L__tmp1010: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 23 | |
st.u64 [%SP+2392], %rd926; | |
st.u64 [%SP+2384], %rd925; | |
st.u64 [%SP+2376], %rd924; | |
st.u64 [%SP+2368], %rd923; | |
st.u64 [%SP+2360], %rd922; | |
st.u64 [%SP+2352], %rd921; | |
ld.u64 %rd927, [%SP+2360]; | |
st.u64 [%SP+1800], %rd927; | |
add.u64 %rd928, %SP, 1800; | |
mov.b64 %rd929, %rd928; | |
st.u64 [%SP+1056], %rd929; | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 23 | |
bra.uni $L__tmp1011; | |
$L__tmp1011: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
ld.u64 %rd930, [%SP+1056]; | |
setp.ne.s64 %p108, %rd930, 0; | |
not.pred %p109, %p108; | |
not.pred %p110, %p109; | |
@%p110 bra $L__BB6_74; | |
bra.uni $L__BB6_73; | |
$L__BB6_73: | |
mov.u32 %r76, 0; | |
mov.b32 %r77, %r76; | |
bra.uni $L__BB6_74; | |
$L__BB6_74: | |
ld.u64 %rd931, [%SP+1056]; | |
ld.u64 %rd932, [%rd931]; | |
mov.b64 %rd933, %rd932; | |
$L__tmp1012: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 23 | |
ld.f64 %fd20, [%rd933]; | |
mul.f64 %fd8, %fd7, %fd20; | |
add.u64 %rd934, %SP, 1520; | |
mov.b64 %rd935, %rd934; | |
st.u64 [%SP+1096], %rd935; | |
mov.b64 %rd18, %rd1; | |
$L__tmp1013: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 32 | |
bra.uni $L__tmp1014; | |
$L__tmp1014: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
ld.u64 %rd936, [%SP+1096]; | |
mov.b64 %rd937, %rd936; | |
st.u64 [%SP+1088], %rd937; | |
mov.b64 %rd938, %rd18; | |
$L__tmp1015: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
bra.uni $L__tmp1016; | |
$L__tmp1016: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd939, [%SP+1088]; | |
setp.ne.s64 %p111, %rd939, 0; | |
not.pred %p112, %p111; | |
not.pred %p113, %p112; | |
@%p113 bra $L__BB6_76; | |
bra.uni $L__BB6_75; | |
$L__BB6_75: | |
mov.u32 %r78, 0; | |
mov.b32 %r79, %r78; | |
bra.uni $L__BB6_76; | |
$L__tmp1017: | |
$L__BB6_76: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd940, [%SP+1096]; | |
ld.u64 %rd941, [%rd940+24]; | |
ld.u64 %rd942, [%SP+1096]; | |
ld.u64 %rd943, [%rd942+32]; | |
ld.u64 %rd944, [%SP+1096]; | |
ld.u64 %rd945, [%rd944+40]; | |
ld.u64 %rd946, [%SP+1096]; | |
ld.u64 %rd947, [%rd946+48]; | |
ld.u64 %rd948, [%SP+1096]; | |
ld.u64 %rd949, [%rd948+56]; | |
ld.u64 %rd950, [%SP+1096]; | |
ld.u64 %rd951, [%rd950+64]; | |
add.u64 %rd952, %SP, 1104; | |
mov.b64 %rd953, %rd952; | |
st.u64 [%SP+1072], %rd953; | |
mov.b64 %rd954, %rd18; | |
$L__tmp1018: | |
.loc 11 0 6996 | |
mov.b64 %rd955, %rd941; | |
$L__tmp1019: | |
mov.b64 %rd956, %rd943; | |
$L__tmp1020: | |
mov.b64 %rd957, %rd945; | |
$L__tmp1021: | |
mov.b64 %rd958, %rd947; | |
$L__tmp1022: | |
mov.b64 %rd959, %rd949; | |
$L__tmp1023: | |
mov.b64 %rd960, %rd951; | |
st.u64 [%SP+1080], %rd960; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
bra.uni $L__tmp1024; | |
$L__tmp1024: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd961, [%SP+1072]; | |
$L__tmp1025: | |
.loc 11 0 5739 | |
mov.b64 %rd962, %rd954; | |
$L__tmp1026: | |
mov.b64 %rd963, %rd955; | |
$L__tmp1027: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp1028; | |
$L__tmp1028: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd964, %rd962, 3; | |
add.s64 %rd965, %rd963, %rd964; | |
st.u64 [%rd961], %rd965; | |
$L__tmp1029: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd966, [%SP+1072]; | |
mov.b64 %rd967, %rd954; | |
$L__tmp1030: | |
.loc 11 0 5752 | |
mov.b64 %rd968, %rd956; | |
$L__tmp1031: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp1032; | |
$L__tmp1032: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd969, %rd967, 3; | |
add.s64 %rd970, %rd968, %rd969; | |
st.u64 [%rd966+8], %rd970; | |
$L__tmp1033: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd971, [%SP+1072]; | |
mov.b64 %rd972, %rd954; | |
$L__tmp1034: | |
.loc 11 0 5765 | |
mov.b64 %rd973, %rd957; | |
$L__tmp1035: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp1036; | |
$L__tmp1036: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd974, %rd972, 3; | |
add.s64 %rd975, %rd973, %rd974; | |
st.u64 [%rd971+16], %rd975; | |
$L__tmp1037: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd976, [%SP+1072]; | |
mov.b64 %rd977, %rd954; | |
$L__tmp1038: | |
.loc 11 0 5778 | |
mov.b64 %rd978, %rd958; | |
$L__tmp1039: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp1040; | |
$L__tmp1040: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd979, %rd977, 1; | |
add.s64 %rd980, %rd978, %rd979; | |
st.u64 [%rd976+24], %rd980; | |
$L__tmp1041: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd981, [%SP+1072]; | |
mov.b64 %rd982, %rd954; | |
$L__tmp1042: | |
.loc 11 0 5801 | |
mov.b64 %rd983, %rd959; | |
$L__tmp1043: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp1044; | |
$L__tmp1044: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd984, %rd982, 2; | |
add.s64 %rd985, %rd983, %rd984; | |
st.u64 [%rd981+32], %rd985; | |
$L__tmp1045: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd986, [%SP+1072]; | |
ld.u64 %rd987, [%SP+1080]; | |
mov.b64 %rd988, %rd954; | |
$L__tmp1046: | |
.loc 11 0 5822 | |
mov.b64 %rd989, %rd987; | |
st.u64 [%SP+1064], %rd989; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp1047; | |
$L__tmp1047: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd990, [%SP+1064]; | |
shl.b64 %rd991, %rd988, 3; | |
add.s64 %rd992, %rd990, %rd991; | |
st.u64 [%rd986+40], %rd992; | |
$L__tmp1048: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd993, [%SP+1096]; | |
setp.ne.s64 %p114, %rd993, 0; | |
not.pred %p115, %p114; | |
not.pred %p116, %p115; | |
@%p116 bra $L__BB6_78; | |
bra.uni $L__BB6_77; | |
$L__BB6_77: | |
mov.u32 %r80, 0; | |
mov.b32 %r81, %r80; | |
bra.uni $L__BB6_78; | |
$L__BB6_78: | |
ld.u64 %rd994, [%SP+1104]; | |
ld.u64 %rd995, [%SP+1112]; | |
ld.u64 %rd996, [%SP+1120]; | |
ld.u64 %rd997, [%SP+1128]; | |
ld.u64 %rd998, [%SP+1136]; | |
ld.u64 %rd999, [%SP+1144]; | |
$L__tmp1049: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 32 | |
st.u64 [%SP+2440], %rd999; | |
st.u64 [%SP+2432], %rd998; | |
st.u64 [%SP+2424], %rd997; | |
st.u64 [%SP+2416], %rd996; | |
st.u64 [%SP+2408], %rd995; | |
st.u64 [%SP+2400], %rd994; | |
ld.u64 %rd1000, [%SP+2408]; | |
st.u64 [%SP+1808], %rd1000; | |
add.u64 %rd1001, %SP, 1808; | |
mov.b64 %rd1002, %rd1001; | |
st.u64 [%SP+1152], %rd1002; | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 32 | |
bra.uni $L__tmp1050; | |
$L__tmp1050: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
ld.u64 %rd1003, [%SP+1152]; | |
setp.ne.s64 %p117, %rd1003, 0; | |
not.pred %p118, %p117; | |
not.pred %p119, %p118; | |
@%p119 bra $L__BB6_80; | |
bra.uni $L__BB6_79; | |
$L__BB6_79: | |
mov.u32 %r82, 0; | |
mov.b32 %r83, %r82; | |
bra.uni $L__BB6_80; | |
$L__BB6_80: | |
ld.u64 %rd1004, [%SP+1152]; | |
ld.u64 %rd1005, [%rd1004]; | |
mov.b64 %rd1006, %rd1005; | |
$L__tmp1051: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 32 | |
ld.f64 %fd9, [%rd1006]; | |
add.u64 %rd1007, %SP, 1600; | |
mov.b64 %rd1008, %rd1007; | |
st.u64 [%SP+1192], %rd1008; | |
mov.b64 %rd19, %rd1; | |
$L__tmp1052: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 41 | |
bra.uni $L__tmp1053; | |
$L__tmp1053: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
ld.u64 %rd1009, [%SP+1192]; | |
mov.b64 %rd1010, %rd1009; | |
st.u64 [%SP+1184], %rd1010; | |
mov.b64 %rd1011, %rd19; | |
$L__tmp1054: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6977 | |
bra.uni $L__tmp1055; | |
$L__tmp1055: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd1012, [%SP+1184]; | |
setp.ne.s64 %p120, %rd1012, 0; | |
not.pred %p121, %p120; | |
not.pred %p122, %p121; | |
@%p122 bra $L__BB6_82; | |
bra.uni $L__BB6_81; | |
$L__BB6_81: | |
mov.u32 %r84, 0; | |
mov.b32 %r85, %r84; | |
bra.uni $L__BB6_82; | |
$L__tmp1056: | |
$L__BB6_82: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd1013, [%SP+1192]; | |
ld.u64 %rd1014, [%rd1013+24]; | |
ld.u64 %rd1015, [%SP+1192]; | |
ld.u64 %rd1016, [%rd1015+32]; | |
ld.u64 %rd1017, [%SP+1192]; | |
ld.u64 %rd1018, [%rd1017+40]; | |
ld.u64 %rd1019, [%SP+1192]; | |
ld.u64 %rd1020, [%rd1019+48]; | |
ld.u64 %rd1021, [%SP+1192]; | |
ld.u64 %rd1022, [%rd1021+56]; | |
ld.u64 %rd1023, [%SP+1192]; | |
ld.u64 %rd1024, [%rd1023+64]; | |
add.u64 %rd1025, %SP, 1200; | |
mov.b64 %rd1026, %rd1025; | |
st.u64 [%SP+1168], %rd1026; | |
mov.b64 %rd1027, %rd19; | |
$L__tmp1057: | |
.loc 11 0 6996 | |
mov.b64 %rd1028, %rd1014; | |
$L__tmp1058: | |
mov.b64 %rd1029, %rd1016; | |
$L__tmp1059: | |
mov.b64 %rd1030, %rd1018; | |
$L__tmp1060: | |
mov.b64 %rd1031, %rd1020; | |
$L__tmp1061: | |
mov.b64 %rd1032, %rd1022; | |
$L__tmp1062: | |
mov.b64 %rd1033, %rd1024; | |
st.u64 [%SP+1176], %rd1033; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
bra.uni $L__tmp1063; | |
$L__tmp1063: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd1034, [%SP+1168]; | |
$L__tmp1064: | |
.loc 11 0 5739 | |
mov.b64 %rd1035, %rd1027; | |
$L__tmp1065: | |
mov.b64 %rd1036, %rd1028; | |
$L__tmp1066: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp1067; | |
$L__tmp1067: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd1037, %rd1035, 3; | |
add.s64 %rd1038, %rd1036, %rd1037; | |
st.u64 [%rd1034], %rd1038; | |
$L__tmp1068: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd1039, [%SP+1168]; | |
mov.b64 %rd1040, %rd1027; | |
$L__tmp1069: | |
.loc 11 0 5752 | |
mov.b64 %rd1041, %rd1029; | |
$L__tmp1070: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp1071; | |
$L__tmp1071: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd1042, %rd1040, 3; | |
add.s64 %rd1043, %rd1041, %rd1042; | |
st.u64 [%rd1039+8], %rd1043; | |
$L__tmp1072: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd1044, [%SP+1168]; | |
mov.b64 %rd1045, %rd1027; | |
$L__tmp1073: | |
.loc 11 0 5765 | |
mov.b64 %rd1046, %rd1030; | |
$L__tmp1074: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp1075; | |
$L__tmp1075: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd1047, %rd1045, 3; | |
add.s64 %rd1048, %rd1046, %rd1047; | |
st.u64 [%rd1044+16], %rd1048; | |
$L__tmp1076: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd1049, [%SP+1168]; | |
mov.b64 %rd1050, %rd1027; | |
$L__tmp1077: | |
.loc 11 0 5778 | |
mov.b64 %rd1051, %rd1031; | |
$L__tmp1078: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp1079; | |
$L__tmp1079: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd1052, %rd1050, 1; | |
add.s64 %rd1053, %rd1051, %rd1052; | |
st.u64 [%rd1049+24], %rd1053; | |
$L__tmp1080: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd1054, [%SP+1168]; | |
mov.b64 %rd1055, %rd1027; | |
$L__tmp1081: | |
.loc 11 0 5801 | |
mov.b64 %rd1056, %rd1032; | |
$L__tmp1082: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp1083; | |
$L__tmp1083: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd1057, %rd1055, 2; | |
add.s64 %rd1058, %rd1056, %rd1057; | |
st.u64 [%rd1054+32], %rd1058; | |
$L__tmp1084: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd1059, [%SP+1168]; | |
ld.u64 %rd1060, [%SP+1176]; | |
mov.b64 %rd1061, %rd1027; | |
$L__tmp1085: | |
.loc 11 0 5822 | |
mov.b64 %rd1062, %rd1060; | |
st.u64 [%SP+1160], %rd1062; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp1086; | |
$L__tmp1086: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd1063, [%SP+1160]; | |
shl.b64 %rd1064, %rd1061, 3; | |
add.s64 %rd1065, %rd1063, %rd1064; | |
st.u64 [%rd1059+40], %rd1065; | |
$L__tmp1087: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6996 | |
ld.u64 %rd1066, [%SP+1192]; | |
setp.ne.s64 %p123, %rd1066, 0; | |
not.pred %p124, %p123; | |
not.pred %p125, %p124; | |
@%p125 bra $L__BB6_84; | |
bra.uni $L__BB6_83; | |
$L__BB6_83: | |
mov.u32 %r86, 0; | |
mov.b32 %r87, %r86; | |
bra.uni $L__BB6_84; | |
$L__BB6_84: | |
ld.u64 %rd1067, [%SP+1200]; | |
ld.u64 %rd1068, [%SP+1208]; | |
ld.u64 %rd1069, [%SP+1216]; | |
ld.u64 %rd1070, [%SP+1224]; | |
ld.u64 %rd1071, [%SP+1232]; | |
ld.u64 %rd1072, [%SP+1240]; | |
$L__tmp1088: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 41 | |
st.u64 [%SP+2488], %rd1072; | |
st.u64 [%SP+2480], %rd1071; | |
st.u64 [%SP+2472], %rd1070; | |
st.u64 [%SP+2464], %rd1069; | |
st.u64 [%SP+2456], %rd1068; | |
st.u64 [%SP+2448], %rd1067; | |
ld.u64 %rd1073, [%SP+2448]; | |
st.u64 [%SP+1816], %rd1073; | |
add.u64 %rd1074, %SP, 1816; | |
mov.b64 %rd1075, %rd1074; | |
st.u64 [%SP+1248], %rd1075; | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 41 | |
bra.uni $L__tmp1089; | |
$L__tmp1089: | |
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; } | |
.loc 12 31 119 | |
ld.u64 %rd1076, [%SP+1248]; | |
setp.ne.s64 %p126, %rd1076, 0; | |
not.pred %p127, %p126; | |
not.pred %p128, %p127; | |
@%p128 bra $L__BB6_86; | |
bra.uni $L__BB6_85; | |
$L__BB6_85: | |
mov.u32 %r88, 0; | |
mov.b32 %r89, %r88; | |
bra.uni $L__BB6_86; | |
$L__BB6_86: | |
ld.u64 %rd1077, [%SP+1248]; | |
ld.u64 %rd1078, [%rd1077]; | |
mov.b64 %rd1079, %rd1078; | |
$L__tmp1090: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 41 | |
ld.f64 %fd21, [%rd1079]; | |
mul.f64 %fd22, %fd9, %fd21; | |
sub.f64 %fd23, %fd8, %fd22; | |
st.f64 [%SP+1784], %fd23; | |
add.u64 %rd1080, %SP, 1784; | |
mov.b64 %rd20, %rd1080; | |
add.u64 %rd1081, %SP, 1440; | |
mov.b64 %rd1082, %rd1081; | |
st.u64 [%SP+1288], %rd1082; | |
mov.b64 %rd21, %rd1; | |
$L__tmp1091: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 5 | |
bra.uni $L__tmp1092; | |
$L__tmp1092: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6763 | |
ld.u64 %rd1083, [%SP+1288]; | |
mov.b64 %rd1084, %rd1083; | |
st.u64 [%SP+1280], %rd1084; | |
mov.b64 %rd1085, %rd21; | |
$L__tmp1093: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6763 | |
bra.uni $L__tmp1094; | |
$L__tmp1094: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 9055 | |
ld.u64 %rd1086, [%SP+1280]; | |
setp.ne.s64 %p129, %rd1086, 0; | |
not.pred %p130, %p129; | |
not.pred %p131, %p130; | |
@%p131 bra $L__BB6_88; | |
bra.uni $L__BB6_87; | |
$L__BB6_87: | |
mov.u32 %r90, 0; | |
mov.b32 %r91, %r90; | |
bra.uni $L__BB6_88; | |
$L__tmp1095: | |
$L__BB6_88: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6782 | |
ld.u64 %rd1087, [%SP+1288]; | |
ld.u64 %rd1088, [%rd1087+24]; | |
ld.u64 %rd1089, [%SP+1288]; | |
ld.u64 %rd1090, [%rd1089+32]; | |
ld.u64 %rd1091, [%SP+1288]; | |
ld.u64 %rd1092, [%rd1091+40]; | |
ld.u64 %rd1093, [%SP+1288]; | |
ld.u64 %rd1094, [%rd1093+48]; | |
ld.u64 %rd1095, [%SP+1288]; | |
ld.u64 %rd1096, [%rd1095+56]; | |
ld.u64 %rd1097, [%SP+1288]; | |
ld.u64 %rd1098, [%rd1097+64]; | |
add.u64 %rd1099, %SP, 1296; | |
mov.b64 %rd1100, %rd1099; | |
st.u64 [%SP+1264], %rd1100; | |
mov.b64 %rd1101, %rd21; | |
$L__tmp1096: | |
.loc 11 0 6782 | |
mov.b64 %rd1102, %rd1088; | |
$L__tmp1097: | |
mov.b64 %rd1103, %rd1090; | |
$L__tmp1098: | |
mov.b64 %rd1104, %rd1092; | |
$L__tmp1099: | |
mov.b64 %rd1105, %rd1094; | |
$L__tmp1100: | |
mov.b64 %rd1106, %rd1096; | |
$L__tmp1101: | |
mov.b64 %rd1107, %rd1098; | |
st.u64 [%SP+1272], %rd1107; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6782 | |
bra.uni $L__tmp1102; | |
$L__tmp1102: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
ld.u64 %rd1108, [%SP+1264]; | |
$L__tmp1103: | |
.loc 11 0 5739 | |
mov.b64 %rd1109, %rd1101; | |
$L__tmp1104: | |
mov.b64 %rd1110, %rd1102; | |
$L__tmp1105: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5739 | |
bra.uni $L__tmp1106; | |
$L__tmp1106: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd1111, %rd1109, 3; | |
add.s64 %rd1112, %rd1110, %rd1111; | |
st.u64 [%rd1108], %rd1112; | |
$L__tmp1107: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
ld.u64 %rd1113, [%SP+1264]; | |
mov.b64 %rd1114, %rd1101; | |
$L__tmp1108: | |
.loc 11 0 5752 | |
mov.b64 %rd1115, %rd1103; | |
$L__tmp1109: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5752 | |
bra.uni $L__tmp1110; | |
$L__tmp1110: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd1116, %rd1114, 3; | |
add.s64 %rd1117, %rd1115, %rd1116; | |
st.u64 [%rd1113+8], %rd1117; | |
$L__tmp1111: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
ld.u64 %rd1118, [%SP+1264]; | |
mov.b64 %rd1119, %rd1101; | |
$L__tmp1112: | |
.loc 11 0 5765 | |
mov.b64 %rd1120, %rd1104; | |
$L__tmp1113: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5765 | |
bra.uni $L__tmp1114; | |
$L__tmp1114: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd1121, %rd1119, 3; | |
add.s64 %rd1122, %rd1120, %rd1121; | |
st.u64 [%rd1118+16], %rd1122; | |
$L__tmp1115: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
ld.u64 %rd1123, [%SP+1264]; | |
mov.b64 %rd1124, %rd1101; | |
$L__tmp1116: | |
.loc 11 0 5778 | |
mov.b64 %rd1125, %rd1105; | |
$L__tmp1117: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5778 | |
bra.uni $L__tmp1118; | |
$L__tmp1118: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd1126, %rd1124, 1; | |
add.s64 %rd1127, %rd1125, %rd1126; | |
st.u64 [%rd1123+24], %rd1127; | |
$L__tmp1119: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
ld.u64 %rd1128, [%SP+1264]; | |
mov.b64 %rd1129, %rd1101; | |
$L__tmp1120: | |
.loc 11 0 5801 | |
mov.b64 %rd1130, %rd1106; | |
$L__tmp1121: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5801 | |
bra.uni $L__tmp1122; | |
$L__tmp1122: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
shl.b64 %rd1131, %rd1129, 2; | |
add.s64 %rd1132, %rd1130, %rd1131; | |
st.u64 [%rd1128+32], %rd1132; | |
$L__tmp1123: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
ld.u64 %rd1133, [%SP+1264]; | |
ld.u64 %rd1134, [%SP+1272]; | |
mov.b64 %rd1135, %rd1101; | |
$L__tmp1124: | |
.loc 11 0 5822 | |
mov.b64 %rd1136, %rd1134; | |
st.u64 [%SP+1256], %rd1136; | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 5822 | |
bra.uni $L__tmp1125; | |
$L__tmp1125: | |
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {} | |
.loc 12 29 120 | |
ld.u64 %rd1137, [%SP+1256]; | |
shl.b64 %rd1138, %rd1135, 3; | |
add.s64 %rd1139, %rd1137, %rd1138; | |
st.u64 [%rd1133+40], %rd1139; | |
$L__tmp1126: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 6782 | |
ld.u64 %rd1140, [%SP+1288]; | |
setp.ne.s64 %p132, %rd1140, 0; | |
not.pred %p133, %p132; | |
not.pred %p134, %p133; | |
@%p134 bra $L__BB6_90; | |
bra.uni $L__BB6_89; | |
$L__BB6_89: | |
mov.u32 %r92, 0; | |
mov.b32 %r93, %r92; | |
bra.uni $L__BB6_90; | |
$L__BB6_90: | |
ld.u64 %rd1141, [%SP+1296]; | |
ld.u64 %rd1142, [%SP+1304]; | |
ld.u64 %rd1143, [%SP+1312]; | |
ld.u64 %rd1144, [%SP+1320]; | |
ld.u64 %rd1145, [%SP+1328]; | |
ld.u64 %rd1146, [%SP+1336]; | |
$L__tmp1127: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 5 | |
st.u64 [%SP+2536], %rd1146; | |
st.u64 [%SP+2528], %rd1145; | |
st.u64 [%SP+2520], %rd1144; | |
st.u64 [%SP+2512], %rd1143; | |
st.u64 [%SP+2504], %rd1142; | |
st.u64 [%SP+2496], %rd1141; | |
ld.u64 %rd1147, [%SP+2512]; | |
st.u64 [%SP+1776], %rd1147; | |
add.u64 %rd1148, %SP, 1776; | |
mov.b64 %rd1149, %rd1148; | |
st.u64 [%SP+1344], %rd1149; | |
mov.b64 %rd22, %rd20; | |
$L__tmp1128: | |
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x; | |
.loc 18 72 5 | |
bra.uni $L__tmp1129; | |
$L__tmp1129: | |
//soa_v7.h:35 SOA_HOST_DEVICE_INLINE T& operator= (const T2& v) { return val_ = v; } | |
.loc 12 35 120 | |
ld.u64 %rd1150, [%SP+1344]; | |
setp.ne.s64 %p135, %rd1150, 0; | |
not.pred %p136, %p135; | |
not.pred %p137, %p136; | |
@%p137 bra $L__BB6_92; | |
bra.uni $L__BB6_91; | |
$L__BB6_91: | |
mov.u32 %r94, 0; | |
mov.b32 %r95, %r94; | |
bra.uni $L__BB6_92; | |
$L__BB6_92: | |
ld.f64 %fd24, [%rd22]; | |
ld.u64 %rd1151, [%SP+1344]; | |
ld.u64 %rd1152, [%rd1151]; | |
mov.b64 %rd1153, %rd1152; | |
st.f64 [%rd1153], %fd24; | |
mov.b64 %rd1154, %rd1153; | |
$L__tmp1130: | |
//test_v7_cuda.cu:73 } | |
.loc 18 73 3 | |
bra.uni $L__BB6_93; | |
$L__BB6_93: | |
ret; | |
$L__tmp1131: | |
$L__func_end6: | |
} | |
.entry _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m( | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0[80], | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1[80], | |
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2[80], | |
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3 | |
) | |
{ | |
.local .align 8 .b8 __local_depot7[360]; | |
.reg .b64 %SP; | |
.reg .b64 %SPL; | |
.reg .pred %p<48>; | |
.reg .b32 %r<36>; | |
.reg .f64 %fd<22>; | |
.reg .b64 %rd<153>; | |
//test_v7_cuda.cu:76 __global__ void handcraftedCrossProductSoA(testSoA::SoA r, const testSoA::SoA a, const testSoA::SoA b, size_t nElements) { | |
.loc 18 76 0 | |
$L__func_begin7: | |
//test_v7_cuda.cu:76 __global__ void handcraftedCrossProductSoA(testSoA::SoA r, const testSoA::SoA a, const testSoA::SoA b, size_t nElements) { | |
.loc 18 76 0 | |
mov.u64 %SPL, __local_depot7; | |
cvta.local.u64 %SP, %SPL; | |
ld.param.u64 %rd23, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+8]; | |
ld.param.u64 %rd24, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+16]; | |
ld.param.u64 %rd25, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+24]; | |
ld.param.u64 %rd26, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+32]; | |
ld.param.u64 %rd27, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+40]; | |
ld.param.u64 %rd28, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+48]; | |
ld.param.u64 %rd29, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+56]; | |
ld.param.u64 %rd30, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+64]; | |
ld.param.u64 %rd31, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+72]; | |
ld.param.u64 %rd13, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+8]; | |
ld.param.u64 %rd14, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+16]; | |
ld.param.u64 %rd15, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+24]; | |
ld.param.u64 %rd16, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+32]; | |
ld.param.u64 %rd17, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+40]; | |
ld.param.u64 %rd18, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+48]; | |
ld.param.u64 %rd19, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+56]; | |
ld.param.u64 %rd20, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+64]; | |
ld.param.u64 %rd21, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+72]; | |
ld.param.u64 %rd3, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+8]; | |
ld.param.u64 %rd4, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+16]; | |
ld.param.u64 %rd5, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+24]; | |
ld.param.u64 %rd6, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+32]; | |
ld.param.u64 %rd7, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+40]; | |
ld.param.u64 %rd8, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+48]; | |
ld.param.u64 %rd9, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+56]; | |
ld.param.u64 %rd10, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+64]; | |
ld.param.u64 %rd11, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+72]; | |
ld.param.u64 %rd32, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3]; | |
ld.param.u64 %rd22, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2]; | |
ld.param.u64 %rd12, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1]; | |
ld.param.u64 %rd2, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0]; | |
st.u64 [%SP+192], %rd11; | |
st.u64 [%SP+184], %rd10; | |
st.u64 [%SP+176], %rd9; | |
st.u64 [%SP+168], %rd8; | |
st.u64 [%SP+160], %rd7; | |
st.u64 [%SP+152], %rd6; | |
st.u64 [%SP+144], %rd5; | |
st.u64 [%SP+136], %rd4; | |
st.u64 [%SP+128], %rd3; | |
st.u64 [%SP+120], %rd2; | |
st.u64 [%SP+272], %rd21; | |
st.u64 [%SP+264], %rd20; | |
st.u64 [%SP+256], %rd19; | |
st.u64 [%SP+248], %rd18; | |
st.u64 [%SP+240], %rd17; | |
st.u64 [%SP+232], %rd16; | |
st.u64 [%SP+224], %rd15; | |
st.u64 [%SP+216], %rd14; | |
st.u64 [%SP+208], %rd13; | |
st.u64 [%SP+200], %rd12; | |
st.u64 [%SP+352], %rd31; | |
st.u64 [%SP+344], %rd30; | |
st.u64 [%SP+336], %rd29; | |
st.u64 [%SP+328], %rd28; | |
st.u64 [%SP+320], %rd27; | |
st.u64 [%SP+312], %rd26; | |
st.u64 [%SP+304], %rd25; | |
st.u64 [%SP+296], %rd24; | |
st.u64 [%SP+288], %rd23; | |
st.u64 [%SP+280], %rd22; | |
$L__tmp1132: | |
//test_v7_cuda.cu:77 size_t i = blockIdx.x * blockDim.x + threadIdx.x; | |
.loc 18 77 14 | |
mov.u32 %r1, %ctaid.x; | |
mov.u32 %r2, %ntid.x; | |
mul.lo.s32 %r3, %r1, %r2; | |
mov.u32 %r4, %tid.x; | |
add.s32 %r5, %r3, %r4; | |
cvt.u64.u32 %rd1, %r5; | |
$L__tmp1133: | |
//test_v7_cuda.cu:78 if (i >= nElements) return; | |
.loc 18 78 5 | |
setp.ge.u64 %p1, %rd1, %rd32; | |
not.pred %p2, %p1; | |
@%p2 bra $L__BB7_2; | |
bra.uni $L__BB7_1; | |
$L__BB7_1: | |
$L__tmp1134: | |
//test_v7_cuda.cu:78 if (i >= nElements) return; | |
.loc 18 78 25 | |
bra.uni $L__BB7_33; | |
$L__tmp1135: | |
$L__BB7_2: | |
.loc 18 0 25 | |
add.u64 %rd33, %SP, 200; | |
mov.b64 %rd34, %rd33; | |
st.u64 [%SP+112], %rd34; | |
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i]; | |
.loc 18 79 16 | |
bra.uni $L__tmp1136; | |
$L__tmp1136: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 8155 | |
ld.u64 %rd35, [%SP+112]; | |
setp.ne.s64 %p3, %rd35, 0; | |
not.pred %p4, %p3; | |
not.pred %p5, %p4; | |
@%p5 bra $L__BB7_4; | |
bra.uni $L__BB7_3; | |
$L__BB7_3: | |
mov.u32 %r6, 0; | |
mov.b32 %r7, %r6; | |
bra.uni $L__BB7_4; | |
$L__BB7_4: | |
ld.u64 %rd36, [%SP+112]; | |
ld.u64 %rd37, [%rd36+32]; | |
mov.b64 %rd38, %rd37; | |
$L__tmp1137: | |
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i]; | |
.loc 18 79 16 | |
shl.b64 %rd39, %rd1, 3; | |
add.s64 %rd40, %rd38, %rd39; | |
ld.f64 %fd1, [%rd40]; | |
add.u64 %rd41, %SP, 280; | |
mov.b64 %rd42, %rd41; | |
st.u64 [%SP+0], %rd42; | |
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i]; | |
.loc 18 79 27 | |
bra.uni $L__tmp1138; | |
$L__tmp1138: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 8155 | |
ld.u64 %rd43, [%SP+0]; | |
setp.ne.s64 %p6, %rd43, 0; | |
not.pred %p7, %p6; | |
not.pred %p8, %p7; | |
@%p8 bra $L__BB7_6; | |
bra.uni $L__BB7_5; | |
$L__BB7_5: | |
mov.u32 %r8, 0; | |
mov.b32 %r9, %r8; | |
bra.uni $L__BB7_6; | |
$L__BB7_6: | |
ld.u64 %rd44, [%SP+0]; | |
ld.u64 %rd45, [%rd44+32]; | |
mov.b64 %rd46, %rd45; | |
$L__tmp1139: | |
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i]; | |
.loc 18 79 27 | |
shl.b64 %rd47, %rd1, 3; | |
add.s64 %rd48, %rd46, %rd47; | |
ld.f64 %fd13, [%rd48]; | |
mul.f64 %fd2, %fd1, %fd13; | |
add.u64 %rd49, %SP, 200; | |
mov.b64 %rd50, %rd49; | |
st.u64 [%SP+8], %rd50; | |
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i]; | |
.loc 18 79 38 | |
bra.uni $L__tmp1140; | |
$L__tmp1140: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 8282 | |
ld.u64 %rd51, [%SP+8]; | |
setp.ne.s64 %p9, %rd51, 0; | |
not.pred %p10, %p9; | |
not.pred %p11, %p10; | |
@%p11 bra $L__BB7_8; | |
bra.uni $L__BB7_7; | |
$L__BB7_7: | |
mov.u32 %r10, 0; | |
mov.b32 %r11, %r10; | |
bra.uni $L__BB7_8; | |
$L__BB7_8: | |
ld.u64 %rd52, [%SP+8]; | |
ld.u64 %rd53, [%rd52+40]; | |
mov.b64 %rd54, %rd53; | |
$L__tmp1141: | |
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i]; | |
.loc 18 79 38 | |
shl.b64 %rd55, %rd1, 3; | |
add.s64 %rd56, %rd54, %rd55; | |
ld.f64 %fd3, [%rd56]; | |
add.u64 %rd57, %SP, 280; | |
mov.b64 %rd58, %rd57; | |
st.u64 [%SP+16], %rd58; | |
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i]; | |
.loc 18 79 49 | |
bra.uni $L__tmp1142; | |
$L__tmp1142: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 8155 | |
ld.u64 %rd59, [%SP+16]; | |
setp.ne.s64 %p12, %rd59, 0; | |
not.pred %p13, %p12; | |
not.pred %p14, %p13; | |
@%p14 bra $L__BB7_10; | |
bra.uni $L__BB7_9; | |
$L__BB7_9: | |
mov.u32 %r12, 0; | |
mov.b32 %r13, %r12; | |
bra.uni $L__BB7_10; | |
$L__BB7_10: | |
ld.u64 %rd60, [%SP+16]; | |
ld.u64 %rd61, [%rd60+32]; | |
mov.b64 %rd62, %rd61; | |
$L__tmp1143: | |
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i]; | |
.loc 18 79 49 | |
shl.b64 %rd63, %rd1, 3; | |
add.s64 %rd64, %rd62, %rd63; | |
ld.f64 %fd14, [%rd64]; | |
mul.f64 %fd15, %fd3, %fd14; | |
sub.f64 %fd4, %fd2, %fd15; | |
add.u64 %rd65, %SP, 120; | |
mov.b64 %rd66, %rd65; | |
st.u64 [%SP+24], %rd66; | |
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i]; | |
.loc 18 79 5 | |
bra.uni $L__tmp1144; | |
$L__tmp1144: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 7158 | |
ld.u64 %rd67, [%SP+24]; | |
setp.ne.s64 %p15, %rd67, 0; | |
not.pred %p16, %p15; | |
not.pred %p17, %p16; | |
@%p17 bra $L__BB7_12; | |
bra.uni $L__BB7_11; | |
$L__BB7_11: | |
mov.u32 %r14, 0; | |
mov.b32 %r15, %r14; | |
bra.uni $L__BB7_12; | |
$L__BB7_12: | |
ld.u64 %rd68, [%SP+24]; | |
ld.u64 %rd69, [%rd68+24]; | |
mov.b64 %rd70, %rd69; | |
$L__tmp1145: | |
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i]; | |
.loc 18 79 5 | |
shl.b64 %rd71, %rd1, 3; | |
add.s64 %rd72, %rd70, %rd71; | |
st.f64 [%rd72], %fd4; | |
add.u64 %rd73, %SP, 200; | |
mov.b64 %rd74, %rd73; | |
st.u64 [%SP+32], %rd74; | |
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i]; | |
.loc 18 80 16 | |
bra.uni $L__tmp1146; | |
$L__tmp1146: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 8282 | |
ld.u64 %rd75, [%SP+32]; | |
setp.ne.s64 %p18, %rd75, 0; | |
not.pred %p19, %p18; | |
not.pred %p20, %p19; | |
@%p20 bra $L__BB7_14; | |
bra.uni $L__BB7_13; | |
$L__BB7_13: | |
mov.u32 %r16, 0; | |
mov.b32 %r17, %r16; | |
bra.uni $L__BB7_14; | |
$L__BB7_14: | |
ld.u64 %rd76, [%SP+32]; | |
ld.u64 %rd77, [%rd76+40]; | |
mov.b64 %rd78, %rd77; | |
$L__tmp1147: | |
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i]; | |
.loc 18 80 16 | |
shl.b64 %rd79, %rd1, 3; | |
add.s64 %rd80, %rd78, %rd79; | |
ld.f64 %fd5, [%rd80]; | |
add.u64 %rd81, %SP, 280; | |
mov.b64 %rd82, %rd81; | |
st.u64 [%SP+40], %rd82; | |
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i]; | |
.loc 18 80 27 | |
bra.uni $L__tmp1148; | |
$L__tmp1148: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 8028 | |
ld.u64 %rd83, [%SP+40]; | |
setp.ne.s64 %p21, %rd83, 0; | |
not.pred %p22, %p21; | |
not.pred %p23, %p22; | |
@%p23 bra $L__BB7_16; | |
bra.uni $L__BB7_15; | |
$L__BB7_15: | |
mov.u32 %r18, 0; | |
mov.b32 %r19, %r18; | |
bra.uni $L__BB7_16; | |
$L__BB7_16: | |
ld.u64 %rd84, [%SP+40]; | |
ld.u64 %rd85, [%rd84+24]; | |
mov.b64 %rd86, %rd85; | |
$L__tmp1149: | |
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i]; | |
.loc 18 80 27 | |
shl.b64 %rd87, %rd1, 3; | |
add.s64 %rd88, %rd86, %rd87; | |
ld.f64 %fd16, [%rd88]; | |
mul.f64 %fd6, %fd5, %fd16; | |
add.u64 %rd89, %SP, 200; | |
mov.b64 %rd90, %rd89; | |
st.u64 [%SP+48], %rd90; | |
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i]; | |
.loc 18 80 38 | |
bra.uni $L__tmp1150; | |
$L__tmp1150: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 8028 | |
ld.u64 %rd91, [%SP+48]; | |
setp.ne.s64 %p24, %rd91, 0; | |
not.pred %p25, %p24; | |
not.pred %p26, %p25; | |
@%p26 bra $L__BB7_18; | |
bra.uni $L__BB7_17; | |
$L__BB7_17: | |
mov.u32 %r20, 0; | |
mov.b32 %r21, %r20; | |
bra.uni $L__BB7_18; | |
$L__BB7_18: | |
ld.u64 %rd92, [%SP+48]; | |
ld.u64 %rd93, [%rd92+24]; | |
mov.b64 %rd94, %rd93; | |
$L__tmp1151: | |
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i]; | |
.loc 18 80 38 | |
shl.b64 %rd95, %rd1, 3; | |
add.s64 %rd96, %rd94, %rd95; | |
ld.f64 %fd7, [%rd96]; | |
add.u64 %rd97, %SP, 280; | |
mov.b64 %rd98, %rd97; | |
st.u64 [%SP+56], %rd98; | |
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i]; | |
.loc 18 80 49 | |
bra.uni $L__tmp1152; | |
$L__tmp1152: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 8282 | |
ld.u64 %rd99, [%SP+56]; | |
setp.ne.s64 %p27, %rd99, 0; | |
not.pred %p28, %p27; | |
not.pred %p29, %p28; | |
@%p29 bra $L__BB7_20; | |
bra.uni $L__BB7_19; | |
$L__BB7_19: | |
mov.u32 %r22, 0; | |
mov.b32 %r23, %r22; | |
bra.uni $L__BB7_20; | |
$L__BB7_20: | |
ld.u64 %rd100, [%SP+56]; | |
ld.u64 %rd101, [%rd100+40]; | |
mov.b64 %rd102, %rd101; | |
$L__tmp1153: | |
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i]; | |
.loc 18 80 49 | |
shl.b64 %rd103, %rd1, 3; | |
add.s64 %rd104, %rd102, %rd103; | |
ld.f64 %fd17, [%rd104]; | |
mul.f64 %fd18, %fd7, %fd17; | |
sub.f64 %fd8, %fd6, %fd18; | |
add.u64 %rd105, %SP, 120; | |
mov.b64 %rd106, %rd105; | |
st.u64 [%SP+64], %rd106; | |
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i]; | |
.loc 18 80 5 | |
bra.uni $L__tmp1154; | |
$L__tmp1154: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 7273 | |
ld.u64 %rd107, [%SP+64]; | |
setp.ne.s64 %p30, %rd107, 0; | |
not.pred %p31, %p30; | |
not.pred %p32, %p31; | |
@%p32 bra $L__BB7_22; | |
bra.uni $L__BB7_21; | |
$L__BB7_21: | |
mov.u32 %r24, 0; | |
mov.b32 %r25, %r24; | |
bra.uni $L__BB7_22; | |
$L__BB7_22: | |
ld.u64 %rd108, [%SP+64]; | |
ld.u64 %rd109, [%rd108+32]; | |
mov.b64 %rd110, %rd109; | |
$L__tmp1155: | |
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i]; | |
.loc 18 80 5 | |
shl.b64 %rd111, %rd1, 3; | |
add.s64 %rd112, %rd110, %rd111; | |
st.f64 [%rd112], %fd8; | |
add.u64 %rd113, %SP, 200; | |
mov.b64 %rd114, %rd113; | |
st.u64 [%SP+72], %rd114; | |
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i]; | |
.loc 18 81 16 | |
bra.uni $L__tmp1156; | |
$L__tmp1156: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 8028 | |
ld.u64 %rd115, [%SP+72]; | |
setp.ne.s64 %p33, %rd115, 0; | |
not.pred %p34, %p33; | |
not.pred %p35, %p34; | |
@%p35 bra $L__BB7_24; | |
bra.uni $L__BB7_23; | |
$L__BB7_23: | |
mov.u32 %r26, 0; | |
mov.b32 %r27, %r26; | |
bra.uni $L__BB7_24; | |
$L__BB7_24: | |
ld.u64 %rd116, [%SP+72]; | |
ld.u64 %rd117, [%rd116+24]; | |
mov.b64 %rd118, %rd117; | |
$L__tmp1157: | |
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i]; | |
.loc 18 81 16 | |
shl.b64 %rd119, %rd1, 3; | |
add.s64 %rd120, %rd118, %rd119; | |
ld.f64 %fd9, [%rd120]; | |
add.u64 %rd121, %SP, 280; | |
mov.b64 %rd122, %rd121; | |
st.u64 [%SP+80], %rd122; | |
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i]; | |
.loc 18 81 27 | |
bra.uni $L__tmp1158; | |
$L__tmp1158: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 8155 | |
ld.u64 %rd123, [%SP+80]; | |
setp.ne.s64 %p36, %rd123, 0; | |
not.pred %p37, %p36; | |
not.pred %p38, %p37; | |
@%p38 bra $L__BB7_26; | |
bra.uni $L__BB7_25; | |
$L__BB7_25: | |
mov.u32 %r28, 0; | |
mov.b32 %r29, %r28; | |
bra.uni $L__BB7_26; | |
$L__BB7_26: | |
ld.u64 %rd124, [%SP+80]; | |
ld.u64 %rd125, [%rd124+32]; | |
mov.b64 %rd126, %rd125; | |
$L__tmp1159: | |
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i]; | |
.loc 18 81 27 | |
shl.b64 %rd127, %rd1, 3; | |
add.s64 %rd128, %rd126, %rd127; | |
ld.f64 %fd19, [%rd128]; | |
mul.f64 %fd10, %fd9, %fd19; | |
add.u64 %rd129, %SP, 200; | |
mov.b64 %rd130, %rd129; | |
st.u64 [%SP+88], %rd130; | |
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i]; | |
.loc 18 81 38 | |
bra.uni $L__tmp1160; | |
$L__tmp1160: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 8155 | |
ld.u64 %rd131, [%SP+88]; | |
setp.ne.s64 %p39, %rd131, 0; | |
not.pred %p40, %p39; | |
not.pred %p41, %p40; | |
@%p41 bra $L__BB7_28; | |
bra.uni $L__BB7_27; | |
$L__BB7_27: | |
mov.u32 %r30, 0; | |
mov.b32 %r31, %r30; | |
bra.uni $L__BB7_28; | |
$L__BB7_28: | |
ld.u64 %rd132, [%SP+88]; | |
ld.u64 %rd133, [%rd132+32]; | |
mov.b64 %rd134, %rd133; | |
$L__tmp1161: | |
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i]; | |
.loc 18 81 38 | |
shl.b64 %rd135, %rd1, 3; | |
add.s64 %rd136, %rd134, %rd135; | |
ld.f64 %fd11, [%rd136]; | |
add.u64 %rd137, %SP, 280; | |
mov.b64 %rd138, %rd137; | |
st.u64 [%SP+96], %rd138; | |
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i]; | |
.loc 18 81 49 | |
bra.uni $L__tmp1162; | |
$L__tmp1162: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 8028 | |
ld.u64 %rd139, [%SP+96]; | |
setp.ne.s64 %p42, %rd139, 0; | |
not.pred %p43, %p42; | |
not.pred %p44, %p43; | |
@%p44 bra $L__BB7_30; | |
bra.uni $L__BB7_29; | |
$L__BB7_29: | |
mov.u32 %r32, 0; | |
mov.b32 %r33, %r32; | |
bra.uni $L__BB7_30; | |
$L__BB7_30: | |
ld.u64 %rd140, [%SP+96]; | |
ld.u64 %rd141, [%rd140+24]; | |
mov.b64 %rd142, %rd141; | |
$L__tmp1163: | |
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i]; | |
.loc 18 81 49 | |
shl.b64 %rd143, %rd1, 3; | |
add.s64 %rd144, %rd142, %rd143; | |
ld.f64 %fd20, [%rd144]; | |
mul.f64 %fd21, %fd11, %fd20; | |
sub.f64 %fd12, %fd10, %fd21; | |
add.u64 %rd145, %SP, 120; | |
mov.b64 %rd146, %rd145; | |
st.u64 [%SP+104], %rd146; | |
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i]; | |
.loc 18 81 5 | |
bra.uni $L__tmp1164; | |
$L__tmp1164: | |
//soa_v7_cuda.h:36 declare_SoA_template(SoA, | |
.loc 11 36 7388 | |
ld.u64 %rd147, [%SP+104]; | |
setp.ne.s64 %p45, %rd147, 0; | |
not.pred %p46, %p45; | |
not.pred %p47, %p46; | |
@%p47 bra $L__BB7_32; | |
bra.uni $L__BB7_31; | |
$L__BB7_31: | |
mov.u32 %r34, 0; | |
mov.b32 %r35, %r34; | |
bra.uni $L__BB7_32; | |
$L__BB7_32: | |
ld.u64 %rd148, [%SP+104]; | |
ld.u64 %rd149, [%rd148+40]; | |
mov.b64 %rd150, %rd149; | |
$L__tmp1165: | |
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i]; | |
.loc 18 81 5 | |
shl.b64 %rd151, %rd1, 3; | |
add.s64 %rd152, %rd150, %rd151; | |
st.f64 [%rd152], %fd12; | |
//test_v7_cuda.cu:82 } | |
.loc 18 82 3 | |
bra.uni $L__BB7_33; | |
$L__BB7_33: | |
ret; | |
$L__tmp1166: | |
$L__func_end7: | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment