Skip to content

Instantly share code, notes, and snippets.

@ericcano
Last active July 8, 2021 09:46
Show Gist options
  • Save ericcano/ad6586cb0619d6a42cc91606a6556855 to your computer and use it in GitHub Desktop.
Save ericcano/ad6586cb0619d6a42cc91606a6556855 to your computer and use it in GitHub Desktop.
.entry _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m(
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0[80],
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1[80],
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2[80],
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3
)
{
.local .align 8 .b8 __local_depot5[712];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<66>;
.reg .b32 %r<48>;
.reg .f64 %fd<25>;
.reg .b64 %rd<312>;
//test_v7_cuda.cu:57 __global__ void indirectCrossProductSoA(testSoA::SoA r, const testSoA::SoA a, const testSoA::SoA b, size_t nElements) {
.loc 18 57 0
$L__func_begin5:
//test_v7_cuda.cu:57 __global__ void indirectCrossProductSoA(testSoA::SoA r, const testSoA::SoA a, const testSoA::SoA b, size_t nElements) {
.loc 18 57 0
mov.u64 %SPL, __local_depot5;
cvta.local.u64 %SP, %SPL;
ld.param.u64 %rd44, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+8];
ld.param.u64 %rd45, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+16];
ld.param.u64 %rd46, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+24];
ld.param.u64 %rd47, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+32];
ld.param.u64 %rd48, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+40];
ld.param.u64 %rd49, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+48];
ld.param.u64 %rd50, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+56];
ld.param.u64 %rd51, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+64];
ld.param.u64 %rd52, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+72];
ld.param.u64 %rd34, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+8];
ld.param.u64 %rd35, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+16];
ld.param.u64 %rd36, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+24];
ld.param.u64 %rd37, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+32];
ld.param.u64 %rd38, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+40];
ld.param.u64 %rd39, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+48];
ld.param.u64 %rd40, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+56];
ld.param.u64 %rd41, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+64];
ld.param.u64 %rd42, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+72];
ld.param.u64 %rd24, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+8];
ld.param.u64 %rd25, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+16];
ld.param.u64 %rd26, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+24];
ld.param.u64 %rd27, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+32];
ld.param.u64 %rd28, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+40];
ld.param.u64 %rd29, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+48];
ld.param.u64 %rd30, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+56];
ld.param.u64 %rd31, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+64];
ld.param.u64 %rd32, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+72];
ld.param.u64 %rd53, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3];
ld.param.u64 %rd43, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2];
ld.param.u64 %rd33, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1];
ld.param.u64 %rd23, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe23indirectCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0];
st.u64 [%SP+384], %rd32;
st.u64 [%SP+376], %rd31;
st.u64 [%SP+368], %rd30;
st.u64 [%SP+360], %rd29;
st.u64 [%SP+352], %rd28;
st.u64 [%SP+344], %rd27;
st.u64 [%SP+336], %rd26;
st.u64 [%SP+328], %rd25;
st.u64 [%SP+320], %rd24;
st.u64 [%SP+312], %rd23;
st.u64 [%SP+464], %rd42;
st.u64 [%SP+456], %rd41;
st.u64 [%SP+448], %rd40;
st.u64 [%SP+440], %rd39;
st.u64 [%SP+432], %rd38;
st.u64 [%SP+424], %rd37;
st.u64 [%SP+416], %rd36;
st.u64 [%SP+408], %rd35;
st.u64 [%SP+400], %rd34;
st.u64 [%SP+392], %rd33;
st.u64 [%SP+544], %rd52;
st.u64 [%SP+536], %rd51;
st.u64 [%SP+528], %rd50;
st.u64 [%SP+520], %rd49;
st.u64 [%SP+512], %rd48;
st.u64 [%SP+504], %rd47;
st.u64 [%SP+496], %rd46;
st.u64 [%SP+488], %rd45;
st.u64 [%SP+480], %rd44;
st.u64 [%SP+472], %rd43;
$L__tmp377:
//test_v7_cuda.cu:58 size_t i = blockIdx.x * blockDim.x + threadIdx.x;
.loc 18 58 14
mov.u32 %r1, %ctaid.x;
mov.u32 %r2, %ntid.x;
mul.lo.s32 %r3, %r1, %r2;
mov.u32 %r4, %tid.x;
add.s32 %r5, %r3, %r4;
cvt.u64.u32 %rd1, %r5;
$L__tmp378:
//test_v7_cuda.cu:59 if (i >= nElements) return;
.loc 18 59 5
setp.ge.u64 %p1, %rd1, %rd53;
not.pred %p2, %p1;
@%p2 bra $L__BB5_2;
bra.uni $L__BB5_1;
$L__BB5_1:
$L__tmp379:
//test_v7_cuda.cu:59 if (i >= nElements) return;
.loc 18 59 25
bra.uni $L__BB5_45;
$L__tmp380:
$L__BB5_2:
.loc 18 0 25
add.u64 %rd54, %SP, 312;
mov.b64 %rd55, %rd54;
st.u64 [%SP+256], %rd55;
mov.b64 %rd2, %rd1;
$L__tmp381:
//test_v7_cuda.cu:62 auto ri = r[i];
.loc 18 62 15
bra.uni $L__tmp382;
$L__tmp382:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6763
ld.u64 %rd56, [%SP+256];
mov.b64 %rd57, %rd56;
st.u64 [%SP+248], %rd57;
mov.b64 %rd58, %rd2;
$L__tmp383:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6763
bra.uni $L__tmp384;
$L__tmp384:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd59, [%SP+248];
setp.ne.s64 %p3, %rd59, 0;
not.pred %p4, %p3;
not.pred %p5, %p4;
@%p5 bra $L__BB5_4;
bra.uni $L__BB5_3;
$L__BB5_3:
mov.u32 %r6, 0;
mov.b32 %r7, %r6;
bra.uni $L__BB5_4;
$L__tmp385:
$L__BB5_4:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6782
ld.u64 %rd60, [%SP+256];
ld.u64 %rd61, [%rd60+24];
ld.u64 %rd62, [%SP+256];
ld.u64 %rd63, [%rd62+32];
ld.u64 %rd64, [%SP+256];
ld.u64 %rd65, [%rd64+40];
ld.u64 %rd66, [%SP+256];
ld.u64 %rd67, [%rd66+48];
ld.u64 %rd68, [%SP+256];
ld.u64 %rd69, [%rd68+56];
ld.u64 %rd70, [%SP+256];
ld.u64 %rd71, [%rd70+64];
add.u64 %rd72, %SP, 264;
mov.b64 %rd73, %rd72;
st.u64 [%SP+232], %rd73;
mov.b64 %rd74, %rd2;
$L__tmp386:
.loc 11 0 6782
mov.b64 %rd75, %rd61;
$L__tmp387:
mov.b64 %rd76, %rd63;
$L__tmp388:
mov.b64 %rd77, %rd65;
$L__tmp389:
mov.b64 %rd78, %rd67;
$L__tmp390:
mov.b64 %rd79, %rd69;
$L__tmp391:
mov.b64 %rd80, %rd71;
st.u64 [%SP+240], %rd80;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6782
bra.uni $L__tmp392;
$L__tmp392:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd81, [%SP+232];
$L__tmp393:
.loc 11 0 5739
mov.b64 %rd82, %rd74;
$L__tmp394:
mov.b64 %rd83, %rd75;
$L__tmp395:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp396;
$L__tmp396:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd84, %rd82, 3;
add.s64 %rd85, %rd83, %rd84;
st.u64 [%rd81], %rd85;
$L__tmp397:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd86, [%SP+232];
mov.b64 %rd87, %rd74;
$L__tmp398:
.loc 11 0 5752
mov.b64 %rd88, %rd76;
$L__tmp399:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp400;
$L__tmp400:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd89, %rd87, 3;
add.s64 %rd90, %rd88, %rd89;
st.u64 [%rd86+8], %rd90;
$L__tmp401:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd91, [%SP+232];
mov.b64 %rd92, %rd74;
$L__tmp402:
.loc 11 0 5765
mov.b64 %rd93, %rd77;
$L__tmp403:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp404;
$L__tmp404:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd94, %rd92, 3;
add.s64 %rd95, %rd93, %rd94;
st.u64 [%rd91+16], %rd95;
$L__tmp405:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd96, [%SP+232];
mov.b64 %rd97, %rd74;
$L__tmp406:
.loc 11 0 5778
mov.b64 %rd98, %rd78;
$L__tmp407:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp408;
$L__tmp408:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd99, %rd97, 1;
add.s64 %rd100, %rd98, %rd99;
st.u64 [%rd96+24], %rd100;
$L__tmp409:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd101, [%SP+232];
mov.b64 %rd102, %rd74;
$L__tmp410:
.loc 11 0 5801
mov.b64 %rd103, %rd79;
$L__tmp411:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp412;
$L__tmp412:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd104, %rd102, 2;
add.s64 %rd105, %rd103, %rd104;
st.u64 [%rd101+32], %rd105;
$L__tmp413:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd106, [%SP+232];
ld.u64 %rd107, [%SP+240];
mov.b64 %rd108, %rd74;
$L__tmp414:
.loc 11 0 5822
mov.b64 %rd109, %rd107;
st.u64 [%SP+224], %rd109;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp415;
$L__tmp415:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd110, [%SP+224];
shl.b64 %rd111, %rd108, 3;
add.s64 %rd112, %rd110, %rd111;
st.u64 [%rd106+40], %rd112;
$L__tmp416:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6782
ld.u64 %rd113, [%SP+256];
setp.ne.s64 %p6, %rd113, 0;
not.pred %p7, %p6;
not.pred %p8, %p7;
@%p8 bra $L__BB5_6;
bra.uni $L__BB5_5;
$L__BB5_5:
mov.u32 %r8, 0;
mov.b32 %r9, %r8;
bra.uni $L__BB5_6;
$L__BB5_6:
ld.u64 %rd114, [%SP+264];
ld.u64 %rd115, [%SP+272];
ld.u64 %rd116, [%SP+280];
ld.u64 %rd117, [%SP+288];
ld.u64 %rd118, [%SP+296];
ld.u64 %rd119, [%SP+304];
$L__tmp417:
//test_v7_cuda.cu:62 auto ri = r[i];
.loc 18 62 15
st.u64 [%SP+704], %rd119;
st.u64 [%SP+696], %rd118;
st.u64 [%SP+688], %rd117;
st.u64 [%SP+680], %rd116;
st.u64 [%SP+672], %rd115;
st.u64 [%SP+664], %rd114;
add.u64 %rd120, %SP, 392;
mov.b64 %rd121, %rd120;
st.u64 [%SP+32], %rd121;
mov.b64 %rd3, %rd1;
$L__tmp418:
//test_v7_cuda.cu:63 crossProduct(ri, a[i], b[i]);
.loc 18 63 22
bra.uni $L__tmp419;
$L__tmp419:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
ld.u64 %rd122, [%SP+32];
mov.b64 %rd123, %rd122;
st.u64 [%SP+24], %rd123;
mov.b64 %rd124, %rd3;
$L__tmp420:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
bra.uni $L__tmp421;
$L__tmp421:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd125, [%SP+24];
setp.ne.s64 %p9, %rd125, 0;
not.pred %p10, %p9;
not.pred %p11, %p10;
@%p11 bra $L__BB5_8;
bra.uni $L__BB5_7;
$L__BB5_7:
mov.u32 %r10, 0;
mov.b32 %r11, %r10;
bra.uni $L__BB5_8;
$L__tmp422:
$L__BB5_8:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd126, [%SP+32];
ld.u64 %rd127, [%rd126+24];
ld.u64 %rd128, [%SP+32];
ld.u64 %rd129, [%rd128+32];
ld.u64 %rd130, [%SP+32];
ld.u64 %rd131, [%rd130+40];
ld.u64 %rd132, [%SP+32];
ld.u64 %rd133, [%rd132+48];
ld.u64 %rd134, [%SP+32];
ld.u64 %rd135, [%rd134+56];
ld.u64 %rd136, [%SP+32];
ld.u64 %rd137, [%rd136+64];
add.u64 %rd138, %SP, 40;
mov.b64 %rd139, %rd138;
st.u64 [%SP+8], %rd139;
mov.b64 %rd140, %rd3;
$L__tmp423:
.loc 11 0 6996
mov.b64 %rd141, %rd127;
$L__tmp424:
mov.b64 %rd142, %rd129;
$L__tmp425:
mov.b64 %rd143, %rd131;
$L__tmp426:
mov.b64 %rd144, %rd133;
$L__tmp427:
mov.b64 %rd145, %rd135;
$L__tmp428:
mov.b64 %rd146, %rd137;
st.u64 [%SP+16], %rd146;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
bra.uni $L__tmp429;
$L__tmp429:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd147, [%SP+8];
$L__tmp430:
.loc 11 0 5739
mov.b64 %rd148, %rd140;
$L__tmp431:
mov.b64 %rd149, %rd141;
$L__tmp432:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp433;
$L__tmp433:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd150, %rd148, 3;
add.s64 %rd151, %rd149, %rd150;
st.u64 [%rd147], %rd151;
$L__tmp434:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd152, [%SP+8];
mov.b64 %rd153, %rd140;
$L__tmp435:
.loc 11 0 5752
mov.b64 %rd154, %rd142;
$L__tmp436:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp437;
$L__tmp437:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd155, %rd153, 3;
add.s64 %rd156, %rd154, %rd155;
st.u64 [%rd152+8], %rd156;
$L__tmp438:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd157, [%SP+8];
mov.b64 %rd158, %rd140;
$L__tmp439:
.loc 11 0 5765
mov.b64 %rd159, %rd143;
$L__tmp440:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp441;
$L__tmp441:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd160, %rd158, 3;
add.s64 %rd161, %rd159, %rd160;
st.u64 [%rd157+16], %rd161;
$L__tmp442:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd162, [%SP+8];
mov.b64 %rd163, %rd140;
$L__tmp443:
.loc 11 0 5778
mov.b64 %rd164, %rd144;
$L__tmp444:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp445;
$L__tmp445:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd165, %rd163, 1;
add.s64 %rd166, %rd164, %rd165;
st.u64 [%rd162+24], %rd166;
$L__tmp446:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd167, [%SP+8];
mov.b64 %rd168, %rd140;
$L__tmp447:
.loc 11 0 5801
mov.b64 %rd169, %rd145;
$L__tmp448:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp449;
$L__tmp449:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd170, %rd168, 2;
add.s64 %rd171, %rd169, %rd170;
st.u64 [%rd167+32], %rd171;
$L__tmp450:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd172, [%SP+8];
ld.u64 %rd173, [%SP+16];
mov.b64 %rd174, %rd140;
$L__tmp451:
.loc 11 0 5822
mov.b64 %rd175, %rd173;
st.u64 [%SP+0], %rd175;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp452;
$L__tmp452:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd176, [%SP+0];
shl.b64 %rd177, %rd174, 3;
add.s64 %rd178, %rd176, %rd177;
st.u64 [%rd172+40], %rd178;
$L__tmp453:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd179, [%SP+32];
setp.ne.s64 %p12, %rd179, 0;
not.pred %p13, %p12;
not.pred %p14, %p13;
@%p14 bra $L__BB5_10;
bra.uni $L__BB5_9;
$L__BB5_9:
mov.u32 %r12, 0;
mov.b32 %r13, %r12;
bra.uni $L__BB5_10;
$L__BB5_10:
ld.u64 %rd180, [%SP+40];
ld.u64 %rd181, [%SP+48];
ld.u64 %rd182, [%SP+56];
ld.u64 %rd183, [%SP+64];
ld.u64 %rd184, [%SP+72];
ld.u64 %rd185, [%SP+80];
$L__tmp454:
//test_v7_cuda.cu:63 crossProduct(ri, a[i], b[i]);
.loc 18 63 22
st.u64 [%SP+592], %rd185;
st.u64 [%SP+584], %rd184;
st.u64 [%SP+576], %rd183;
st.u64 [%SP+568], %rd182;
st.u64 [%SP+560], %rd181;
st.u64 [%SP+552], %rd180;
add.u64 %rd186, %SP, 552;
mov.b64 %rd187, %rd186;
st.u64 [%SP+600], %rd187;
add.u64 %rd188, %SP, 472;
mov.b64 %rd189, %rd188;
st.u64 [%SP+120], %rd189;
mov.b64 %rd4, %rd1;
$L__tmp455:
//test_v7_cuda.cu:63 crossProduct(ri, a[i], b[i]);
.loc 18 63 28
bra.uni $L__tmp456;
$L__tmp456:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
ld.u64 %rd190, [%SP+120];
mov.b64 %rd191, %rd190;
st.u64 [%SP+112], %rd191;
mov.b64 %rd192, %rd4;
$L__tmp457:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
bra.uni $L__tmp458;
$L__tmp458:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd193, [%SP+112];
setp.ne.s64 %p15, %rd193, 0;
not.pred %p16, %p15;
not.pred %p17, %p16;
@%p17 bra $L__BB5_12;
bra.uni $L__BB5_11;
$L__BB5_11:
mov.u32 %r14, 0;
mov.b32 %r15, %r14;
bra.uni $L__BB5_12;
$L__tmp459:
$L__BB5_12:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd194, [%SP+120];
ld.u64 %rd195, [%rd194+24];
ld.u64 %rd196, [%SP+120];
ld.u64 %rd197, [%rd196+32];
ld.u64 %rd198, [%SP+120];
ld.u64 %rd199, [%rd198+40];
ld.u64 %rd200, [%SP+120];
ld.u64 %rd201, [%rd200+48];
ld.u64 %rd202, [%SP+120];
ld.u64 %rd203, [%rd202+56];
ld.u64 %rd204, [%SP+120];
ld.u64 %rd205, [%rd204+64];
add.u64 %rd206, %SP, 128;
mov.b64 %rd207, %rd206;
st.u64 [%SP+96], %rd207;
mov.b64 %rd208, %rd4;
$L__tmp460:
.loc 11 0 6996
mov.b64 %rd209, %rd195;
$L__tmp461:
mov.b64 %rd210, %rd197;
$L__tmp462:
mov.b64 %rd211, %rd199;
$L__tmp463:
mov.b64 %rd212, %rd201;
$L__tmp464:
mov.b64 %rd213, %rd203;
$L__tmp465:
mov.b64 %rd214, %rd205;
st.u64 [%SP+104], %rd214;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
bra.uni $L__tmp466;
$L__tmp466:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd215, [%SP+96];
$L__tmp467:
.loc 11 0 5739
mov.b64 %rd216, %rd208;
$L__tmp468:
mov.b64 %rd217, %rd209;
$L__tmp469:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp470;
$L__tmp470:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd218, %rd216, 3;
add.s64 %rd219, %rd217, %rd218;
st.u64 [%rd215], %rd219;
$L__tmp471:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd220, [%SP+96];
mov.b64 %rd221, %rd208;
$L__tmp472:
.loc 11 0 5752
mov.b64 %rd222, %rd210;
$L__tmp473:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp474;
$L__tmp474:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd223, %rd221, 3;
add.s64 %rd224, %rd222, %rd223;
st.u64 [%rd220+8], %rd224;
$L__tmp475:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd225, [%SP+96];
mov.b64 %rd226, %rd208;
$L__tmp476:
.loc 11 0 5765
mov.b64 %rd227, %rd211;
$L__tmp477:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp478;
$L__tmp478:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd228, %rd226, 3;
add.s64 %rd229, %rd227, %rd228;
st.u64 [%rd225+16], %rd229;
$L__tmp479:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd230, [%SP+96];
mov.b64 %rd231, %rd208;
$L__tmp480:
.loc 11 0 5778
mov.b64 %rd232, %rd212;
$L__tmp481:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp482;
$L__tmp482:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd233, %rd231, 1;
add.s64 %rd234, %rd232, %rd233;
st.u64 [%rd230+24], %rd234;
$L__tmp483:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd235, [%SP+96];
mov.b64 %rd236, %rd208;
$L__tmp484:
.loc 11 0 5801
mov.b64 %rd237, %rd213;
$L__tmp485:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp486;
$L__tmp486:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd238, %rd236, 2;
add.s64 %rd239, %rd237, %rd238;
st.u64 [%rd235+32], %rd239;
$L__tmp487:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd240, [%SP+96];
ld.u64 %rd241, [%SP+104];
mov.b64 %rd242, %rd208;
$L__tmp488:
.loc 11 0 5822
mov.b64 %rd243, %rd241;
st.u64 [%SP+88], %rd243;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp489;
$L__tmp489:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd244, [%SP+88];
shl.b64 %rd245, %rd242, 3;
add.s64 %rd246, %rd244, %rd245;
st.u64 [%rd240+40], %rd246;
$L__tmp490:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd247, [%SP+120];
setp.ne.s64 %p18, %rd247, 0;
not.pred %p19, %p18;
not.pred %p20, %p19;
@%p20 bra $L__BB5_14;
bra.uni $L__BB5_13;
$L__BB5_13:
mov.u32 %r16, 0;
mov.b32 %r17, %r16;
bra.uni $L__BB5_14;
$L__BB5_14:
ld.u64 %rd248, [%SP+128];
ld.u64 %rd249, [%SP+136];
ld.u64 %rd250, [%SP+144];
ld.u64 %rd251, [%SP+152];
ld.u64 %rd252, [%SP+160];
ld.u64 %rd253, [%SP+168];
$L__tmp491:
//test_v7_cuda.cu:63 crossProduct(ri, a[i], b[i]);
.loc 18 63 28
st.u64 [%SP+648], %rd253;
st.u64 [%SP+640], %rd252;
st.u64 [%SP+632], %rd251;
st.u64 [%SP+624], %rd250;
st.u64 [%SP+616], %rd249;
st.u64 [%SP+608], %rd248;
add.u64 %rd254, %SP, 608;
mov.b64 %rd255, %rd254;
st.u64 [%SP+656], %rd255;
ld.u64 %rd256, [%SP+656];
ld.u64 %rd257, [%SP+600];
ld.u64 %rd258, [%SP+656];
add.u64 %rd259, %SP, 664;
mov.b64 %rd260, %rd259;
st.u64 [%SP+176], %rd260;
mov.b64 %rd261, %rd257;
st.u64 [%SP+184], %rd261;
mov.b64 %rd262, %rd258;
st.u64 [%SP+192], %rd262;
//test_v7_cuda.cu:63 crossProduct(ri, a[i], b[i]);
.loc 18 63 5
bra.uni $L__tmp492;
$L__tmp492:
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y;
.loc 18 51 5
ld.u64 %rd263, [%SP+184];
add.s64 %rd5, %rd263, 8;
$L__tmp493:
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y;
.loc 18 51 5
bra.uni $L__tmp494;
$L__tmp494:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
setp.ne.s64 %p21, %rd5, 0;
not.pred %p22, %p21;
not.pred %p23, %p22;
@%p23 bra $L__BB5_16;
bra.uni $L__BB5_15;
$L__BB5_15:
mov.u32 %r18, 0;
mov.b32 %r19, %r18;
bra.uni $L__BB5_16;
$L__BB5_16:
ld.u64 %rd264, [%rd5];
mov.b64 %rd265, %rd264;
$L__tmp495:
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y;
.loc 18 51 5
ld.f64 %fd1, [%rd265];
ld.u64 %rd266, [%SP+192];
add.s64 %rd6, %rd266, 16;
$L__tmp496:
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y;
.loc 18 51 5
bra.uni $L__tmp497;
$L__tmp497:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
setp.ne.s64 %p24, %rd6, 0;
not.pred %p25, %p24;
not.pred %p26, %p25;
@%p26 bra $L__BB5_18;
bra.uni $L__BB5_17;
$L__BB5_17:
mov.u32 %r20, 0;
mov.b32 %r21, %r20;
bra.uni $L__BB5_18;
$L__BB5_18:
ld.u64 %rd267, [%rd6];
mov.b64 %rd268, %rd267;
$L__tmp498:
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y;
.loc 18 51 5
ld.f64 %fd10, [%rd268];
mul.f64 %fd2, %fd1, %fd10;
ld.u64 %rd269, [%SP+184];
add.s64 %rd7, %rd269, 16;
$L__tmp499:
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y;
.loc 18 51 5
bra.uni $L__tmp500;
$L__tmp500:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
setp.ne.s64 %p27, %rd7, 0;
not.pred %p28, %p27;
not.pred %p29, %p28;
@%p29 bra $L__BB5_20;
bra.uni $L__BB5_19;
$L__BB5_19:
mov.u32 %r22, 0;
mov.b32 %r23, %r22;
bra.uni $L__BB5_20;
$L__BB5_20:
ld.u64 %rd270, [%rd7];
mov.b64 %rd271, %rd270;
$L__tmp501:
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y;
.loc 18 51 5
ld.f64 %fd3, [%rd271];
ld.u64 %rd272, [%SP+192];
add.s64 %rd8, %rd272, 8;
$L__tmp502:
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y;
.loc 18 51 5
bra.uni $L__tmp503;
$L__tmp503:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
setp.ne.s64 %p30, %rd8, 0;
not.pred %p31, %p30;
not.pred %p32, %p31;
@%p32 bra $L__BB5_22;
bra.uni $L__BB5_21;
$L__BB5_21:
mov.u32 %r24, 0;
mov.b32 %r25, %r24;
bra.uni $L__BB5_22;
$L__BB5_22:
ld.u64 %rd273, [%rd8];
mov.b64 %rd274, %rd273;
$L__tmp504:
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y;
.loc 18 51 5
ld.f64 %fd11, [%rd274];
mul.f64 %fd12, %fd3, %fd11;
sub.f64 %fd13, %fd2, %fd12;
st.f64 [%SP+200], %fd13;
add.u64 %rd275, %SP, 200;
mov.b64 %rd276, %rd275;
ld.u64 %rd9, [%SP+176];
mov.b64 %rd10, %rd276;
$L__tmp505:
//test_v7_cuda.cu:51 r.x = a.y * b.z - a.z * b.y;
.loc 18 51 5
bra.uni $L__tmp506;
$L__tmp506:
//soa_v7.h:35 SOA_HOST_DEVICE_INLINE T& operator= (const T2& v) { return val_ = v; }
.loc 12 35 120
setp.ne.s64 %p33, %rd9, 0;
not.pred %p34, %p33;
not.pred %p35, %p34;
@%p35 bra $L__BB5_24;
bra.uni $L__BB5_23;
$L__BB5_23:
mov.u32 %r26, 0;
mov.b32 %r27, %r26;
bra.uni $L__BB5_24;
$L__BB5_24:
ld.f64 %fd14, [%rd10];
ld.u64 %rd277, [%rd9];
mov.b64 %rd278, %rd277;
st.f64 [%rd278], %fd14;
mov.b64 %rd279, %rd278;
$L__tmp507:
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z;
.loc 18 52 5
ld.u64 %rd280, [%SP+184];
add.s64 %rd11, %rd280, 16;
$L__tmp508:
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z;
.loc 18 52 5
bra.uni $L__tmp509;
$L__tmp509:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
setp.ne.s64 %p36, %rd11, 0;
not.pred %p37, %p36;
not.pred %p38, %p37;
@%p38 bra $L__BB5_26;
bra.uni $L__BB5_25;
$L__BB5_25:
mov.u32 %r28, 0;
mov.b32 %r29, %r28;
bra.uni $L__BB5_26;
$L__BB5_26:
ld.u64 %rd281, [%rd11];
mov.b64 %rd282, %rd281;
$L__tmp510:
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z;
.loc 18 52 5
ld.f64 %fd4, [%rd282];
ld.u64 %rd12, [%SP+192];
$L__tmp511:
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z;
.loc 18 52 5
bra.uni $L__tmp512;
$L__tmp512:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
setp.ne.s64 %p39, %rd12, 0;
not.pred %p40, %p39;
not.pred %p41, %p40;
@%p41 bra $L__BB5_28;
bra.uni $L__BB5_27;
$L__BB5_27:
mov.u32 %r30, 0;
mov.b32 %r31, %r30;
bra.uni $L__BB5_28;
$L__BB5_28:
ld.u64 %rd283, [%rd12];
mov.b64 %rd284, %rd283;
$L__tmp513:
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z;
.loc 18 52 5
ld.f64 %fd15, [%rd284];
mul.f64 %fd5, %fd4, %fd15;
ld.u64 %rd13, [%SP+184];
$L__tmp514:
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z;
.loc 18 52 5
bra.uni $L__tmp515;
$L__tmp515:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
setp.ne.s64 %p42, %rd13, 0;
not.pred %p43, %p42;
not.pred %p44, %p43;
@%p44 bra $L__BB5_30;
bra.uni $L__BB5_29;
$L__BB5_29:
mov.u32 %r32, 0;
mov.b32 %r33, %r32;
bra.uni $L__BB5_30;
$L__BB5_30:
ld.u64 %rd285, [%rd13];
mov.b64 %rd286, %rd285;
$L__tmp516:
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z;
.loc 18 52 5
ld.f64 %fd6, [%rd286];
ld.u64 %rd287, [%SP+192];
add.s64 %rd14, %rd287, 16;
$L__tmp517:
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z;
.loc 18 52 5
bra.uni $L__tmp518;
$L__tmp518:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
setp.ne.s64 %p45, %rd14, 0;
not.pred %p46, %p45;
not.pred %p47, %p46;
@%p47 bra $L__BB5_32;
bra.uni $L__BB5_31;
$L__BB5_31:
mov.u32 %r34, 0;
mov.b32 %r35, %r34;
bra.uni $L__BB5_32;
$L__BB5_32:
ld.u64 %rd288, [%rd14];
mov.b64 %rd289, %rd288;
$L__tmp519:
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z;
.loc 18 52 5
ld.f64 %fd16, [%rd289];
mul.f64 %fd17, %fd6, %fd16;
sub.f64 %fd18, %fd5, %fd17;
st.f64 [%SP+208], %fd18;
add.u64 %rd290, %SP, 208;
mov.b64 %rd291, %rd290;
ld.u64 %rd292, [%SP+176];
add.s64 %rd15, %rd292, 8;
$L__tmp520:
.loc 18 0 5
mov.b64 %rd16, %rd291;
$L__tmp521:
//test_v7_cuda.cu:52 r.y = a.z * b.x - a.x * b.z;
.loc 18 52 5
bra.uni $L__tmp522;
$L__tmp522:
//soa_v7.h:35 SOA_HOST_DEVICE_INLINE T& operator= (const T2& v) { return val_ = v; }
.loc 12 35 120
setp.ne.s64 %p48, %rd15, 0;
not.pred %p49, %p48;
not.pred %p50, %p49;
@%p50 bra $L__BB5_34;
bra.uni $L__BB5_33;
$L__BB5_33:
mov.u32 %r36, 0;
mov.b32 %r37, %r36;
bra.uni $L__BB5_34;
$L__BB5_34:
ld.f64 %fd19, [%rd16];
ld.u64 %rd293, [%rd15];
mov.b64 %rd294, %rd293;
st.f64 [%rd294], %fd19;
mov.b64 %rd295, %rd294;
$L__tmp523:
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x;
.loc 18 53 5
ld.u64 %rd17, [%SP+184];
$L__tmp524:
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x;
.loc 18 53 5
bra.uni $L__tmp525;
$L__tmp525:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
setp.ne.s64 %p51, %rd17, 0;
not.pred %p52, %p51;
not.pred %p53, %p52;
@%p53 bra $L__BB5_36;
bra.uni $L__BB5_35;
$L__BB5_35:
mov.u32 %r38, 0;
mov.b32 %r39, %r38;
bra.uni $L__BB5_36;
$L__BB5_36:
ld.u64 %rd296, [%rd17];
mov.b64 %rd297, %rd296;
$L__tmp526:
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x;
.loc 18 53 5
ld.f64 %fd7, [%rd297];
ld.u64 %rd298, [%SP+192];
add.s64 %rd18, %rd298, 8;
$L__tmp527:
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x;
.loc 18 53 5
bra.uni $L__tmp528;
$L__tmp528:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
setp.ne.s64 %p54, %rd18, 0;
not.pred %p55, %p54;
not.pred %p56, %p55;
@%p56 bra $L__BB5_38;
bra.uni $L__BB5_37;
$L__BB5_37:
mov.u32 %r40, 0;
mov.b32 %r41, %r40;
bra.uni $L__BB5_38;
$L__BB5_38:
ld.u64 %rd299, [%rd18];
mov.b64 %rd300, %rd299;
$L__tmp529:
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x;
.loc 18 53 5
ld.f64 %fd20, [%rd300];
mul.f64 %fd8, %fd7, %fd20;
ld.u64 %rd301, [%SP+184];
add.s64 %rd19, %rd301, 8;
$L__tmp530:
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x;
.loc 18 53 5
bra.uni $L__tmp531;
$L__tmp531:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
setp.ne.s64 %p57, %rd19, 0;
not.pred %p58, %p57;
not.pred %p59, %p58;
@%p59 bra $L__BB5_40;
bra.uni $L__BB5_39;
$L__BB5_39:
mov.u32 %r42, 0;
mov.b32 %r43, %r42;
bra.uni $L__BB5_40;
$L__BB5_40:
ld.u64 %rd302, [%rd19];
mov.b64 %rd303, %rd302;
$L__tmp532:
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x;
.loc 18 53 5
ld.f64 %fd9, [%rd303];
ld.u64 %rd20, [%SP+192];
$L__tmp533:
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x;
.loc 18 53 5
bra.uni $L__tmp534;
$L__tmp534:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
setp.ne.s64 %p60, %rd20, 0;
not.pred %p61, %p60;
not.pred %p62, %p61;
@%p62 bra $L__BB5_42;
bra.uni $L__BB5_41;
$L__BB5_41:
mov.u32 %r44, 0;
mov.b32 %r45, %r44;
bra.uni $L__BB5_42;
$L__BB5_42:
ld.u64 %rd304, [%rd20];
mov.b64 %rd305, %rd304;
$L__tmp535:
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x;
.loc 18 53 5
ld.f64 %fd21, [%rd305];
mul.f64 %fd22, %fd9, %fd21;
sub.f64 %fd23, %fd8, %fd22;
st.f64 [%SP+216], %fd23;
add.u64 %rd306, %SP, 216;
mov.b64 %rd307, %rd306;
ld.u64 %rd308, [%SP+176];
add.s64 %rd21, %rd308, 16;
$L__tmp536:
.loc 18 0 5
mov.b64 %rd22, %rd307;
$L__tmp537:
//test_v7_cuda.cu:53 r.z = a.x * b.y - a.y * b.x;
.loc 18 53 5
bra.uni $L__tmp538;
$L__tmp538:
//soa_v7.h:35 SOA_HOST_DEVICE_INLINE T& operator= (const T2& v) { return val_ = v; }
.loc 12 35 120
setp.ne.s64 %p63, %rd21, 0;
not.pred %p64, %p63;
not.pred %p65, %p64;
@%p65 bra $L__BB5_44;
bra.uni $L__BB5_43;
$L__BB5_43:
mov.u32 %r46, 0;
mov.b32 %r47, %r46;
bra.uni $L__BB5_44;
$L__BB5_44:
ld.f64 %fd24, [%rd22];
ld.u64 %rd309, [%rd21];
mov.b64 %rd310, %rd309;
st.f64 [%rd310], %fd24;
mov.b64 %rd311, %rd310;
$L__tmp539:
//test_v7_cuda.cu:64 }
.loc 18 64 3
bra.uni $L__BB5_45;
$L__BB5_45:
ret;
$L__tmp540:
$L__func_end5:
}
.entry _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m(
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0[80],
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1[80],
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2[80],
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3
)
{
.local .align 8 .b8 __local_depot6[2544];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<138>;
.reg .b32 %r<96>;
.reg .f64 %fd<25>;
.reg .b64 %rd<1155>;
//test_v7_cuda.cu:67 __global__ void directCrossProductSoA(testSoA::SoA r, const testSoA::SoA a, const testSoA::SoA b, size_t nElements) {
.loc 18 67 0
$L__func_begin6:
//test_v7_cuda.cu:67 __global__ void directCrossProductSoA(testSoA::SoA r, const testSoA::SoA a, const testSoA::SoA b, size_t nElements) {
.loc 18 67 0
mov.u64 %SPL, __local_depot6;
cvta.local.u64 %SP, %SPL;
ld.param.u64 %rd44, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+8];
ld.param.u64 %rd45, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+16];
ld.param.u64 %rd46, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+24];
ld.param.u64 %rd47, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+32];
ld.param.u64 %rd48, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+40];
ld.param.u64 %rd49, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+48];
ld.param.u64 %rd50, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+56];
ld.param.u64 %rd51, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+64];
ld.param.u64 %rd52, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+72];
ld.param.u64 %rd34, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+8];
ld.param.u64 %rd35, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+16];
ld.param.u64 %rd36, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+24];
ld.param.u64 %rd37, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+32];
ld.param.u64 %rd38, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+40];
ld.param.u64 %rd39, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+48];
ld.param.u64 %rd40, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+56];
ld.param.u64 %rd41, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+64];
ld.param.u64 %rd42, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+72];
ld.param.u64 %rd24, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+8];
ld.param.u64 %rd25, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+16];
ld.param.u64 %rd26, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+24];
ld.param.u64 %rd27, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+32];
ld.param.u64 %rd28, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+40];
ld.param.u64 %rd29, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+48];
ld.param.u64 %rd30, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+56];
ld.param.u64 %rd31, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+64];
ld.param.u64 %rd32, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+72];
ld.param.u64 %rd53, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3];
ld.param.u64 %rd43, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2];
ld.param.u64 %rd33, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1];
ld.param.u64 %rd23, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe21directCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0];
st.u64 [%SP+1512], %rd32;
st.u64 [%SP+1504], %rd31;
st.u64 [%SP+1496], %rd30;
st.u64 [%SP+1488], %rd29;
st.u64 [%SP+1480], %rd28;
st.u64 [%SP+1472], %rd27;
st.u64 [%SP+1464], %rd26;
st.u64 [%SP+1456], %rd25;
st.u64 [%SP+1448], %rd24;
st.u64 [%SP+1440], %rd23;
st.u64 [%SP+1592], %rd42;
st.u64 [%SP+1584], %rd41;
st.u64 [%SP+1576], %rd40;
st.u64 [%SP+1568], %rd39;
st.u64 [%SP+1560], %rd38;
st.u64 [%SP+1552], %rd37;
st.u64 [%SP+1544], %rd36;
st.u64 [%SP+1536], %rd35;
st.u64 [%SP+1528], %rd34;
st.u64 [%SP+1520], %rd33;
st.u64 [%SP+1672], %rd52;
st.u64 [%SP+1664], %rd51;
st.u64 [%SP+1656], %rd50;
st.u64 [%SP+1648], %rd49;
st.u64 [%SP+1640], %rd48;
st.u64 [%SP+1632], %rd47;
st.u64 [%SP+1624], %rd46;
st.u64 [%SP+1616], %rd45;
st.u64 [%SP+1608], %rd44;
st.u64 [%SP+1600], %rd43;
$L__tmp541:
//test_v7_cuda.cu:68 size_t i = blockIdx.x * blockDim.x + threadIdx.x;
.loc 18 68 14
mov.u32 %r1, %ctaid.x;
mov.u32 %r2, %ntid.x;
mul.lo.s32 %r3, %r1, %r2;
mov.u32 %r4, %tid.x;
add.s32 %r5, %r3, %r4;
cvt.u64.u32 %rd1, %r5;
$L__tmp542:
//test_v7_cuda.cu:69 if (i >= nElements) return;
.loc 18 69 5
setp.ge.u64 %p1, %rd1, %rd53;
not.pred %p2, %p1;
@%p2 bra $L__BB6_2;
bra.uni $L__BB6_1;
$L__BB6_1:
$L__tmp543:
//test_v7_cuda.cu:69 if (i >= nElements) return;
.loc 18 69 25
bra.uni $L__BB6_93;
$L__tmp544:
$L__BB6_2:
.loc 18 0 25
add.u64 %rd54, %SP, 1520;
mov.b64 %rd55, %rd54;
st.u64 [%SP+1384], %rd55;
mov.b64 %rd2, %rd1;
$L__tmp545:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 14
bra.uni $L__tmp546;
$L__tmp546:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
ld.u64 %rd56, [%SP+1384];
mov.b64 %rd57, %rd56;
st.u64 [%SP+1376], %rd57;
mov.b64 %rd58, %rd2;
$L__tmp547:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
bra.uni $L__tmp548;
$L__tmp548:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd59, [%SP+1376];
setp.ne.s64 %p3, %rd59, 0;
not.pred %p4, %p3;
not.pred %p5, %p4;
@%p5 bra $L__BB6_4;
bra.uni $L__BB6_3;
$L__BB6_3:
mov.u32 %r6, 0;
mov.b32 %r7, %r6;
bra.uni $L__BB6_4;
$L__tmp549:
$L__BB6_4:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd60, [%SP+1384];
ld.u64 %rd61, [%rd60+24];
ld.u64 %rd62, [%SP+1384];
ld.u64 %rd63, [%rd62+32];
ld.u64 %rd64, [%SP+1384];
ld.u64 %rd65, [%rd64+40];
ld.u64 %rd66, [%SP+1384];
ld.u64 %rd67, [%rd66+48];
ld.u64 %rd68, [%SP+1384];
ld.u64 %rd69, [%rd68+56];
ld.u64 %rd70, [%SP+1384];
ld.u64 %rd71, [%rd70+64];
add.u64 %rd72, %SP, 1392;
mov.b64 %rd73, %rd72;
st.u64 [%SP+1360], %rd73;
mov.b64 %rd74, %rd2;
$L__tmp550:
.loc 11 0 6996
mov.b64 %rd75, %rd61;
$L__tmp551:
mov.b64 %rd76, %rd63;
$L__tmp552:
mov.b64 %rd77, %rd65;
$L__tmp553:
mov.b64 %rd78, %rd67;
$L__tmp554:
mov.b64 %rd79, %rd69;
$L__tmp555:
mov.b64 %rd80, %rd71;
st.u64 [%SP+1368], %rd80;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
bra.uni $L__tmp556;
$L__tmp556:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd81, [%SP+1360];
$L__tmp557:
.loc 11 0 5739
mov.b64 %rd82, %rd74;
$L__tmp558:
mov.b64 %rd83, %rd75;
$L__tmp559:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp560;
$L__tmp560:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd84, %rd82, 3;
add.s64 %rd85, %rd83, %rd84;
st.u64 [%rd81], %rd85;
$L__tmp561:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd86, [%SP+1360];
mov.b64 %rd87, %rd74;
$L__tmp562:
.loc 11 0 5752
mov.b64 %rd88, %rd76;
$L__tmp563:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp564;
$L__tmp564:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd89, %rd87, 3;
add.s64 %rd90, %rd88, %rd89;
st.u64 [%rd86+8], %rd90;
$L__tmp565:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd91, [%SP+1360];
mov.b64 %rd92, %rd74;
$L__tmp566:
.loc 11 0 5765
mov.b64 %rd93, %rd77;
$L__tmp567:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp568;
$L__tmp568:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd94, %rd92, 3;
add.s64 %rd95, %rd93, %rd94;
st.u64 [%rd91+16], %rd95;
$L__tmp569:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd96, [%SP+1360];
mov.b64 %rd97, %rd74;
$L__tmp570:
.loc 11 0 5778
mov.b64 %rd98, %rd78;
$L__tmp571:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp572;
$L__tmp572:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd99, %rd97, 1;
add.s64 %rd100, %rd98, %rd99;
st.u64 [%rd96+24], %rd100;
$L__tmp573:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd101, [%SP+1360];
mov.b64 %rd102, %rd74;
$L__tmp574:
.loc 11 0 5801
mov.b64 %rd103, %rd79;
$L__tmp575:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp576;
$L__tmp576:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd104, %rd102, 2;
add.s64 %rd105, %rd103, %rd104;
st.u64 [%rd101+32], %rd105;
$L__tmp577:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd106, [%SP+1360];
ld.u64 %rd107, [%SP+1368];
mov.b64 %rd108, %rd74;
$L__tmp578:
.loc 11 0 5822
mov.b64 %rd109, %rd107;
st.u64 [%SP+1352], %rd109;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp579;
$L__tmp579:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd110, [%SP+1352];
shl.b64 %rd111, %rd108, 3;
add.s64 %rd112, %rd110, %rd111;
st.u64 [%rd106+40], %rd112;
$L__tmp580:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd113, [%SP+1384];
setp.ne.s64 %p6, %rd113, 0;
not.pred %p7, %p6;
not.pred %p8, %p7;
@%p8 bra $L__BB6_6;
bra.uni $L__BB6_5;
$L__BB6_5:
mov.u32 %r8, 0;
mov.b32 %r9, %r8;
bra.uni $L__BB6_6;
$L__BB6_6:
ld.u64 %rd114, [%SP+1392];
ld.u64 %rd115, [%SP+1400];
ld.u64 %rd116, [%SP+1408];
ld.u64 %rd117, [%SP+1416];
ld.u64 %rd118, [%SP+1424];
ld.u64 %rd119, [%SP+1432];
$L__tmp581:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 14
st.u64 [%SP+1864], %rd119;
st.u64 [%SP+1856], %rd118;
st.u64 [%SP+1848], %rd117;
st.u64 [%SP+1840], %rd116;
st.u64 [%SP+1832], %rd115;
st.u64 [%SP+1824], %rd114;
ld.u64 %rd120, [%SP+1832];
st.u64 [%SP+1696], %rd120;
add.u64 %rd121, %SP, 1696;
mov.b64 %rd122, %rd121;
st.u64 [%SP+0], %rd122;
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 14
bra.uni $L__tmp582;
$L__tmp582:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
ld.u64 %rd123, [%SP+0];
setp.ne.s64 %p9, %rd123, 0;
not.pred %p10, %p9;
not.pred %p11, %p10;
@%p11 bra $L__BB6_8;
bra.uni $L__BB6_7;
$L__BB6_7:
mov.u32 %r10, 0;
mov.b32 %r11, %r10;
bra.uni $L__BB6_8;
$L__BB6_8:
ld.u64 %rd124, [%SP+0];
ld.u64 %rd125, [%rd124];
mov.b64 %rd126, %rd125;
$L__tmp583:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 14
ld.f64 %fd1, [%rd126];
add.u64 %rd127, %SP, 1600;
mov.b64 %rd128, %rd127;
st.u64 [%SP+40], %rd128;
mov.b64 %rd3, %rd1;
$L__tmp584:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 23
bra.uni $L__tmp585;
$L__tmp585:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
ld.u64 %rd129, [%SP+40];
mov.b64 %rd130, %rd129;
st.u64 [%SP+32], %rd130;
mov.b64 %rd131, %rd3;
$L__tmp586:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
bra.uni $L__tmp587;
$L__tmp587:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd132, [%SP+32];
setp.ne.s64 %p12, %rd132, 0;
not.pred %p13, %p12;
not.pred %p14, %p13;
@%p14 bra $L__BB6_10;
bra.uni $L__BB6_9;
$L__BB6_9:
mov.u32 %r12, 0;
mov.b32 %r13, %r12;
bra.uni $L__BB6_10;
$L__tmp588:
$L__BB6_10:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd133, [%SP+40];
ld.u64 %rd134, [%rd133+24];
ld.u64 %rd135, [%SP+40];
ld.u64 %rd136, [%rd135+32];
ld.u64 %rd137, [%SP+40];
ld.u64 %rd138, [%rd137+40];
ld.u64 %rd139, [%SP+40];
ld.u64 %rd140, [%rd139+48];
ld.u64 %rd141, [%SP+40];
ld.u64 %rd142, [%rd141+56];
ld.u64 %rd143, [%SP+40];
ld.u64 %rd144, [%rd143+64];
add.u64 %rd145, %SP, 48;
mov.b64 %rd146, %rd145;
st.u64 [%SP+16], %rd146;
mov.b64 %rd147, %rd3;
$L__tmp589:
.loc 11 0 6996
mov.b64 %rd148, %rd134;
$L__tmp590:
mov.b64 %rd149, %rd136;
$L__tmp591:
mov.b64 %rd150, %rd138;
$L__tmp592:
mov.b64 %rd151, %rd140;
$L__tmp593:
mov.b64 %rd152, %rd142;
$L__tmp594:
mov.b64 %rd153, %rd144;
st.u64 [%SP+24], %rd153;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
bra.uni $L__tmp595;
$L__tmp595:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd154, [%SP+16];
$L__tmp596:
.loc 11 0 5739
mov.b64 %rd155, %rd147;
$L__tmp597:
mov.b64 %rd156, %rd148;
$L__tmp598:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp599;
$L__tmp599:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd157, %rd155, 3;
add.s64 %rd158, %rd156, %rd157;
st.u64 [%rd154], %rd158;
$L__tmp600:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd159, [%SP+16];
mov.b64 %rd160, %rd147;
$L__tmp601:
.loc 11 0 5752
mov.b64 %rd161, %rd149;
$L__tmp602:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp603;
$L__tmp603:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd162, %rd160, 3;
add.s64 %rd163, %rd161, %rd162;
st.u64 [%rd159+8], %rd163;
$L__tmp604:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd164, [%SP+16];
mov.b64 %rd165, %rd147;
$L__tmp605:
.loc 11 0 5765
mov.b64 %rd166, %rd150;
$L__tmp606:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp607;
$L__tmp607:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd167, %rd165, 3;
add.s64 %rd168, %rd166, %rd167;
st.u64 [%rd164+16], %rd168;
$L__tmp608:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd169, [%SP+16];
mov.b64 %rd170, %rd147;
$L__tmp609:
.loc 11 0 5778
mov.b64 %rd171, %rd151;
$L__tmp610:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp611;
$L__tmp611:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd172, %rd170, 1;
add.s64 %rd173, %rd171, %rd172;
st.u64 [%rd169+24], %rd173;
$L__tmp612:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd174, [%SP+16];
mov.b64 %rd175, %rd147;
$L__tmp613:
.loc 11 0 5801
mov.b64 %rd176, %rd152;
$L__tmp614:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp615;
$L__tmp615:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd177, %rd175, 2;
add.s64 %rd178, %rd176, %rd177;
st.u64 [%rd174+32], %rd178;
$L__tmp616:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd179, [%SP+16];
ld.u64 %rd180, [%SP+24];
mov.b64 %rd181, %rd147;
$L__tmp617:
.loc 11 0 5822
mov.b64 %rd182, %rd180;
st.u64 [%SP+8], %rd182;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp618;
$L__tmp618:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd183, [%SP+8];
shl.b64 %rd184, %rd181, 3;
add.s64 %rd185, %rd183, %rd184;
st.u64 [%rd179+40], %rd185;
$L__tmp619:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd186, [%SP+40];
setp.ne.s64 %p15, %rd186, 0;
not.pred %p16, %p15;
not.pred %p17, %p16;
@%p17 bra $L__BB6_12;
bra.uni $L__BB6_11;
$L__BB6_11:
mov.u32 %r14, 0;
mov.b32 %r15, %r14;
bra.uni $L__BB6_12;
$L__BB6_12:
ld.u64 %rd187, [%SP+48];
ld.u64 %rd188, [%SP+56];
ld.u64 %rd189, [%SP+64];
ld.u64 %rd190, [%SP+72];
ld.u64 %rd191, [%SP+80];
ld.u64 %rd192, [%SP+88];
$L__tmp620:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 23
st.u64 [%SP+1912], %rd192;
st.u64 [%SP+1904], %rd191;
st.u64 [%SP+1896], %rd190;
st.u64 [%SP+1888], %rd189;
st.u64 [%SP+1880], %rd188;
st.u64 [%SP+1872], %rd187;
ld.u64 %rd193, [%SP+1888];
st.u64 [%SP+1704], %rd193;
add.u64 %rd194, %SP, 1704;
mov.b64 %rd195, %rd194;
st.u64 [%SP+96], %rd195;
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 23
bra.uni $L__tmp621;
$L__tmp621:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
ld.u64 %rd196, [%SP+96];
setp.ne.s64 %p18, %rd196, 0;
not.pred %p19, %p18;
not.pred %p20, %p19;
@%p20 bra $L__BB6_14;
bra.uni $L__BB6_13;
$L__BB6_13:
mov.u32 %r16, 0;
mov.b32 %r17, %r16;
bra.uni $L__BB6_14;
$L__BB6_14:
ld.u64 %rd197, [%SP+96];
ld.u64 %rd198, [%rd197];
mov.b64 %rd199, %rd198;
$L__tmp622:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 23
ld.f64 %fd10, [%rd199];
mul.f64 %fd2, %fd1, %fd10;
add.u64 %rd200, %SP, 1520;
mov.b64 %rd201, %rd200;
st.u64 [%SP+136], %rd201;
mov.b64 %rd4, %rd1;
$L__tmp623:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 32
bra.uni $L__tmp624;
$L__tmp624:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
ld.u64 %rd202, [%SP+136];
mov.b64 %rd203, %rd202;
st.u64 [%SP+128], %rd203;
mov.b64 %rd204, %rd4;
$L__tmp625:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
bra.uni $L__tmp626;
$L__tmp626:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd205, [%SP+128];
setp.ne.s64 %p21, %rd205, 0;
not.pred %p22, %p21;
not.pred %p23, %p22;
@%p23 bra $L__BB6_16;
bra.uni $L__BB6_15;
$L__BB6_15:
mov.u32 %r18, 0;
mov.b32 %r19, %r18;
bra.uni $L__BB6_16;
$L__tmp627:
$L__BB6_16:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd206, [%SP+136];
ld.u64 %rd207, [%rd206+24];
ld.u64 %rd208, [%SP+136];
ld.u64 %rd209, [%rd208+32];
ld.u64 %rd210, [%SP+136];
ld.u64 %rd211, [%rd210+40];
ld.u64 %rd212, [%SP+136];
ld.u64 %rd213, [%rd212+48];
ld.u64 %rd214, [%SP+136];
ld.u64 %rd215, [%rd214+56];
ld.u64 %rd216, [%SP+136];
ld.u64 %rd217, [%rd216+64];
add.u64 %rd218, %SP, 144;
mov.b64 %rd219, %rd218;
st.u64 [%SP+112], %rd219;
mov.b64 %rd220, %rd4;
$L__tmp628:
.loc 11 0 6996
mov.b64 %rd221, %rd207;
$L__tmp629:
mov.b64 %rd222, %rd209;
$L__tmp630:
mov.b64 %rd223, %rd211;
$L__tmp631:
mov.b64 %rd224, %rd213;
$L__tmp632:
mov.b64 %rd225, %rd215;
$L__tmp633:
mov.b64 %rd226, %rd217;
st.u64 [%SP+120], %rd226;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
bra.uni $L__tmp634;
$L__tmp634:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd227, [%SP+112];
$L__tmp635:
.loc 11 0 5739
mov.b64 %rd228, %rd220;
$L__tmp636:
mov.b64 %rd229, %rd221;
$L__tmp637:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp638;
$L__tmp638:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd230, %rd228, 3;
add.s64 %rd231, %rd229, %rd230;
st.u64 [%rd227], %rd231;
$L__tmp639:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd232, [%SP+112];
mov.b64 %rd233, %rd220;
$L__tmp640:
.loc 11 0 5752
mov.b64 %rd234, %rd222;
$L__tmp641:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp642;
$L__tmp642:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd235, %rd233, 3;
add.s64 %rd236, %rd234, %rd235;
st.u64 [%rd232+8], %rd236;
$L__tmp643:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd237, [%SP+112];
mov.b64 %rd238, %rd220;
$L__tmp644:
.loc 11 0 5765
mov.b64 %rd239, %rd223;
$L__tmp645:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp646;
$L__tmp646:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd240, %rd238, 3;
add.s64 %rd241, %rd239, %rd240;
st.u64 [%rd237+16], %rd241;
$L__tmp647:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd242, [%SP+112];
mov.b64 %rd243, %rd220;
$L__tmp648:
.loc 11 0 5778
mov.b64 %rd244, %rd224;
$L__tmp649:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp650;
$L__tmp650:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd245, %rd243, 1;
add.s64 %rd246, %rd244, %rd245;
st.u64 [%rd242+24], %rd246;
$L__tmp651:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd247, [%SP+112];
mov.b64 %rd248, %rd220;
$L__tmp652:
.loc 11 0 5801
mov.b64 %rd249, %rd225;
$L__tmp653:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp654;
$L__tmp654:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd250, %rd248, 2;
add.s64 %rd251, %rd249, %rd250;
st.u64 [%rd247+32], %rd251;
$L__tmp655:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd252, [%SP+112];
ld.u64 %rd253, [%SP+120];
mov.b64 %rd254, %rd220;
$L__tmp656:
.loc 11 0 5822
mov.b64 %rd255, %rd253;
st.u64 [%SP+104], %rd255;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp657;
$L__tmp657:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd256, [%SP+104];
shl.b64 %rd257, %rd254, 3;
add.s64 %rd258, %rd256, %rd257;
st.u64 [%rd252+40], %rd258;
$L__tmp658:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd259, [%SP+136];
setp.ne.s64 %p24, %rd259, 0;
not.pred %p25, %p24;
not.pred %p26, %p25;
@%p26 bra $L__BB6_18;
bra.uni $L__BB6_17;
$L__BB6_17:
mov.u32 %r20, 0;
mov.b32 %r21, %r20;
bra.uni $L__BB6_18;
$L__BB6_18:
ld.u64 %rd260, [%SP+144];
ld.u64 %rd261, [%SP+152];
ld.u64 %rd262, [%SP+160];
ld.u64 %rd263, [%SP+168];
ld.u64 %rd264, [%SP+176];
ld.u64 %rd265, [%SP+184];
$L__tmp659:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 32
st.u64 [%SP+1960], %rd265;
st.u64 [%SP+1952], %rd264;
st.u64 [%SP+1944], %rd263;
st.u64 [%SP+1936], %rd262;
st.u64 [%SP+1928], %rd261;
st.u64 [%SP+1920], %rd260;
ld.u64 %rd266, [%SP+1936];
st.u64 [%SP+1712], %rd266;
add.u64 %rd267, %SP, 1712;
mov.b64 %rd268, %rd267;
st.u64 [%SP+192], %rd268;
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 32
bra.uni $L__tmp660;
$L__tmp660:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
ld.u64 %rd269, [%SP+192];
setp.ne.s64 %p27, %rd269, 0;
not.pred %p28, %p27;
not.pred %p29, %p28;
@%p29 bra $L__BB6_20;
bra.uni $L__BB6_19;
$L__BB6_19:
mov.u32 %r22, 0;
mov.b32 %r23, %r22;
bra.uni $L__BB6_20;
$L__BB6_20:
ld.u64 %rd270, [%SP+192];
ld.u64 %rd271, [%rd270];
mov.b64 %rd272, %rd271;
$L__tmp661:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 32
ld.f64 %fd3, [%rd272];
add.u64 %rd273, %SP, 1600;
mov.b64 %rd274, %rd273;
st.u64 [%SP+232], %rd274;
mov.b64 %rd5, %rd1;
$L__tmp662:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 41
bra.uni $L__tmp663;
$L__tmp663:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
ld.u64 %rd275, [%SP+232];
mov.b64 %rd276, %rd275;
st.u64 [%SP+224], %rd276;
mov.b64 %rd277, %rd5;
$L__tmp664:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
bra.uni $L__tmp665;
$L__tmp665:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd278, [%SP+224];
setp.ne.s64 %p30, %rd278, 0;
not.pred %p31, %p30;
not.pred %p32, %p31;
@%p32 bra $L__BB6_22;
bra.uni $L__BB6_21;
$L__BB6_21:
mov.u32 %r24, 0;
mov.b32 %r25, %r24;
bra.uni $L__BB6_22;
$L__tmp666:
$L__BB6_22:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd279, [%SP+232];
ld.u64 %rd280, [%rd279+24];
ld.u64 %rd281, [%SP+232];
ld.u64 %rd282, [%rd281+32];
ld.u64 %rd283, [%SP+232];
ld.u64 %rd284, [%rd283+40];
ld.u64 %rd285, [%SP+232];
ld.u64 %rd286, [%rd285+48];
ld.u64 %rd287, [%SP+232];
ld.u64 %rd288, [%rd287+56];
ld.u64 %rd289, [%SP+232];
ld.u64 %rd290, [%rd289+64];
add.u64 %rd291, %SP, 240;
mov.b64 %rd292, %rd291;
st.u64 [%SP+208], %rd292;
mov.b64 %rd293, %rd5;
$L__tmp667:
.loc 11 0 6996
mov.b64 %rd294, %rd280;
$L__tmp668:
mov.b64 %rd295, %rd282;
$L__tmp669:
mov.b64 %rd296, %rd284;
$L__tmp670:
mov.b64 %rd297, %rd286;
$L__tmp671:
mov.b64 %rd298, %rd288;
$L__tmp672:
mov.b64 %rd299, %rd290;
st.u64 [%SP+216], %rd299;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
bra.uni $L__tmp673;
$L__tmp673:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd300, [%SP+208];
$L__tmp674:
.loc 11 0 5739
mov.b64 %rd301, %rd293;
$L__tmp675:
mov.b64 %rd302, %rd294;
$L__tmp676:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp677;
$L__tmp677:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd303, %rd301, 3;
add.s64 %rd304, %rd302, %rd303;
st.u64 [%rd300], %rd304;
$L__tmp678:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd305, [%SP+208];
mov.b64 %rd306, %rd293;
$L__tmp679:
.loc 11 0 5752
mov.b64 %rd307, %rd295;
$L__tmp680:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp681;
$L__tmp681:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd308, %rd306, 3;
add.s64 %rd309, %rd307, %rd308;
st.u64 [%rd305+8], %rd309;
$L__tmp682:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd310, [%SP+208];
mov.b64 %rd311, %rd293;
$L__tmp683:
.loc 11 0 5765
mov.b64 %rd312, %rd296;
$L__tmp684:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp685;
$L__tmp685:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd313, %rd311, 3;
add.s64 %rd314, %rd312, %rd313;
st.u64 [%rd310+16], %rd314;
$L__tmp686:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd315, [%SP+208];
mov.b64 %rd316, %rd293;
$L__tmp687:
.loc 11 0 5778
mov.b64 %rd317, %rd297;
$L__tmp688:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp689;
$L__tmp689:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd318, %rd316, 1;
add.s64 %rd319, %rd317, %rd318;
st.u64 [%rd315+24], %rd319;
$L__tmp690:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd320, [%SP+208];
mov.b64 %rd321, %rd293;
$L__tmp691:
.loc 11 0 5801
mov.b64 %rd322, %rd298;
$L__tmp692:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp693;
$L__tmp693:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd323, %rd321, 2;
add.s64 %rd324, %rd322, %rd323;
st.u64 [%rd320+32], %rd324;
$L__tmp694:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd325, [%SP+208];
ld.u64 %rd326, [%SP+216];
mov.b64 %rd327, %rd293;
$L__tmp695:
.loc 11 0 5822
mov.b64 %rd328, %rd326;
st.u64 [%SP+200], %rd328;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp696;
$L__tmp696:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd329, [%SP+200];
shl.b64 %rd330, %rd327, 3;
add.s64 %rd331, %rd329, %rd330;
st.u64 [%rd325+40], %rd331;
$L__tmp697:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd332, [%SP+232];
setp.ne.s64 %p33, %rd332, 0;
not.pred %p34, %p33;
not.pred %p35, %p34;
@%p35 bra $L__BB6_24;
bra.uni $L__BB6_23;
$L__BB6_23:
mov.u32 %r26, 0;
mov.b32 %r27, %r26;
bra.uni $L__BB6_24;
$L__BB6_24:
ld.u64 %rd333, [%SP+240];
ld.u64 %rd334, [%SP+248];
ld.u64 %rd335, [%SP+256];
ld.u64 %rd336, [%SP+264];
ld.u64 %rd337, [%SP+272];
ld.u64 %rd338, [%SP+280];
$L__tmp698:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 41
st.u64 [%SP+2008], %rd338;
st.u64 [%SP+2000], %rd337;
st.u64 [%SP+1992], %rd336;
st.u64 [%SP+1984], %rd335;
st.u64 [%SP+1976], %rd334;
st.u64 [%SP+1968], %rd333;
ld.u64 %rd339, [%SP+1976];
st.u64 [%SP+1720], %rd339;
add.u64 %rd340, %SP, 1720;
mov.b64 %rd341, %rd340;
st.u64 [%SP+288], %rd341;
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 41
bra.uni $L__tmp699;
$L__tmp699:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
ld.u64 %rd342, [%SP+288];
setp.ne.s64 %p36, %rd342, 0;
not.pred %p37, %p36;
not.pred %p38, %p37;
@%p38 bra $L__BB6_26;
bra.uni $L__BB6_25;
$L__BB6_25:
mov.u32 %r28, 0;
mov.b32 %r29, %r28;
bra.uni $L__BB6_26;
$L__BB6_26:
ld.u64 %rd343, [%SP+288];
ld.u64 %rd344, [%rd343];
mov.b64 %rd345, %rd344;
$L__tmp700:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 41
ld.f64 %fd11, [%rd345];
mul.f64 %fd12, %fd3, %fd11;
sub.f64 %fd13, %fd2, %fd12;
st.f64 [%SP+1688], %fd13;
add.u64 %rd346, %SP, 1688;
mov.b64 %rd6, %rd346;
add.u64 %rd347, %SP, 1440;
mov.b64 %rd348, %rd347;
st.u64 [%SP+328], %rd348;
mov.b64 %rd7, %rd1;
$L__tmp701:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 5
bra.uni $L__tmp702;
$L__tmp702:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6763
ld.u64 %rd349, [%SP+328];
mov.b64 %rd350, %rd349;
st.u64 [%SP+320], %rd350;
mov.b64 %rd351, %rd7;
$L__tmp703:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6763
bra.uni $L__tmp704;
$L__tmp704:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd352, [%SP+320];
setp.ne.s64 %p39, %rd352, 0;
not.pred %p40, %p39;
not.pred %p41, %p40;
@%p41 bra $L__BB6_28;
bra.uni $L__BB6_27;
$L__BB6_27:
mov.u32 %r30, 0;
mov.b32 %r31, %r30;
bra.uni $L__BB6_28;
$L__tmp705:
$L__BB6_28:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6782
ld.u64 %rd353, [%SP+328];
ld.u64 %rd354, [%rd353+24];
ld.u64 %rd355, [%SP+328];
ld.u64 %rd356, [%rd355+32];
ld.u64 %rd357, [%SP+328];
ld.u64 %rd358, [%rd357+40];
ld.u64 %rd359, [%SP+328];
ld.u64 %rd360, [%rd359+48];
ld.u64 %rd361, [%SP+328];
ld.u64 %rd362, [%rd361+56];
ld.u64 %rd363, [%SP+328];
ld.u64 %rd364, [%rd363+64];
add.u64 %rd365, %SP, 336;
mov.b64 %rd366, %rd365;
st.u64 [%SP+304], %rd366;
mov.b64 %rd367, %rd7;
$L__tmp706:
.loc 11 0 6782
mov.b64 %rd368, %rd354;
$L__tmp707:
mov.b64 %rd369, %rd356;
$L__tmp708:
mov.b64 %rd370, %rd358;
$L__tmp709:
mov.b64 %rd371, %rd360;
$L__tmp710:
mov.b64 %rd372, %rd362;
$L__tmp711:
mov.b64 %rd373, %rd364;
st.u64 [%SP+312], %rd373;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6782
bra.uni $L__tmp712;
$L__tmp712:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd374, [%SP+304];
$L__tmp713:
.loc 11 0 5739
mov.b64 %rd375, %rd367;
$L__tmp714:
mov.b64 %rd376, %rd368;
$L__tmp715:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp716;
$L__tmp716:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd377, %rd375, 3;
add.s64 %rd378, %rd376, %rd377;
st.u64 [%rd374], %rd378;
$L__tmp717:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd379, [%SP+304];
mov.b64 %rd380, %rd367;
$L__tmp718:
.loc 11 0 5752
mov.b64 %rd381, %rd369;
$L__tmp719:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp720;
$L__tmp720:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd382, %rd380, 3;
add.s64 %rd383, %rd381, %rd382;
st.u64 [%rd379+8], %rd383;
$L__tmp721:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd384, [%SP+304];
mov.b64 %rd385, %rd367;
$L__tmp722:
.loc 11 0 5765
mov.b64 %rd386, %rd370;
$L__tmp723:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp724;
$L__tmp724:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd387, %rd385, 3;
add.s64 %rd388, %rd386, %rd387;
st.u64 [%rd384+16], %rd388;
$L__tmp725:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd389, [%SP+304];
mov.b64 %rd390, %rd367;
$L__tmp726:
.loc 11 0 5778
mov.b64 %rd391, %rd371;
$L__tmp727:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp728;
$L__tmp728:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd392, %rd390, 1;
add.s64 %rd393, %rd391, %rd392;
st.u64 [%rd389+24], %rd393;
$L__tmp729:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd394, [%SP+304];
mov.b64 %rd395, %rd367;
$L__tmp730:
.loc 11 0 5801
mov.b64 %rd396, %rd372;
$L__tmp731:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp732;
$L__tmp732:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd397, %rd395, 2;
add.s64 %rd398, %rd396, %rd397;
st.u64 [%rd394+32], %rd398;
$L__tmp733:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd399, [%SP+304];
ld.u64 %rd400, [%SP+312];
mov.b64 %rd401, %rd367;
$L__tmp734:
.loc 11 0 5822
mov.b64 %rd402, %rd400;
st.u64 [%SP+296], %rd402;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp735;
$L__tmp735:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd403, [%SP+296];
shl.b64 %rd404, %rd401, 3;
add.s64 %rd405, %rd403, %rd404;
st.u64 [%rd399+40], %rd405;
$L__tmp736:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6782
ld.u64 %rd406, [%SP+328];
setp.ne.s64 %p42, %rd406, 0;
not.pred %p43, %p42;
not.pred %p44, %p43;
@%p44 bra $L__BB6_30;
bra.uni $L__BB6_29;
$L__BB6_29:
mov.u32 %r32, 0;
mov.b32 %r33, %r32;
bra.uni $L__BB6_30;
$L__BB6_30:
ld.u64 %rd407, [%SP+336];
ld.u64 %rd408, [%SP+344];
ld.u64 %rd409, [%SP+352];
ld.u64 %rd410, [%SP+360];
ld.u64 %rd411, [%SP+368];
ld.u64 %rd412, [%SP+376];
$L__tmp737:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 5
st.u64 [%SP+2056], %rd412;
st.u64 [%SP+2048], %rd411;
st.u64 [%SP+2040], %rd410;
st.u64 [%SP+2032], %rd409;
st.u64 [%SP+2024], %rd408;
st.u64 [%SP+2016], %rd407;
ld.u64 %rd413, [%SP+2016];
st.u64 [%SP+1680], %rd413;
add.u64 %rd414, %SP, 1680;
mov.b64 %rd415, %rd414;
st.u64 [%SP+384], %rd415;
mov.b64 %rd8, %rd6;
$L__tmp738:
//test_v7_cuda.cu:70 r[i].x = a[i].y * b[i].z - a[i].z * b[i].y;
.loc 18 70 5
bra.uni $L__tmp739;
$L__tmp739:
//soa_v7.h:35 SOA_HOST_DEVICE_INLINE T& operator= (const T2& v) { return val_ = v; }
.loc 12 35 120
ld.u64 %rd416, [%SP+384];
setp.ne.s64 %p45, %rd416, 0;
not.pred %p46, %p45;
not.pred %p47, %p46;
@%p47 bra $L__BB6_32;
bra.uni $L__BB6_31;
$L__BB6_31:
mov.u32 %r34, 0;
mov.b32 %r35, %r34;
bra.uni $L__BB6_32;
$L__BB6_32:
ld.f64 %fd14, [%rd8];
ld.u64 %rd417, [%SP+384];
ld.u64 %rd418, [%rd417];
mov.b64 %rd419, %rd418;
st.f64 [%rd419], %fd14;
mov.b64 %rd420, %rd419;
add.u64 %rd421, %SP, 1520;
mov.b64 %rd422, %rd421;
st.u64 [%SP+424], %rd422;
mov.b64 %rd9, %rd1;
$L__tmp740:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 14
bra.uni $L__tmp741;
$L__tmp741:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
ld.u64 %rd423, [%SP+424];
mov.b64 %rd424, %rd423;
st.u64 [%SP+416], %rd424;
mov.b64 %rd425, %rd9;
$L__tmp742:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
bra.uni $L__tmp743;
$L__tmp743:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd426, [%SP+416];
setp.ne.s64 %p48, %rd426, 0;
not.pred %p49, %p48;
not.pred %p50, %p49;
@%p50 bra $L__BB6_34;
bra.uni $L__BB6_33;
$L__BB6_33:
mov.u32 %r36, 0;
mov.b32 %r37, %r36;
bra.uni $L__BB6_34;
$L__tmp744:
$L__BB6_34:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd427, [%SP+424];
ld.u64 %rd428, [%rd427+24];
ld.u64 %rd429, [%SP+424];
ld.u64 %rd430, [%rd429+32];
ld.u64 %rd431, [%SP+424];
ld.u64 %rd432, [%rd431+40];
ld.u64 %rd433, [%SP+424];
ld.u64 %rd434, [%rd433+48];
ld.u64 %rd435, [%SP+424];
ld.u64 %rd436, [%rd435+56];
ld.u64 %rd437, [%SP+424];
ld.u64 %rd438, [%rd437+64];
add.u64 %rd439, %SP, 432;
mov.b64 %rd440, %rd439;
st.u64 [%SP+400], %rd440;
mov.b64 %rd441, %rd9;
$L__tmp745:
.loc 11 0 6996
mov.b64 %rd442, %rd428;
$L__tmp746:
mov.b64 %rd443, %rd430;
$L__tmp747:
mov.b64 %rd444, %rd432;
$L__tmp748:
mov.b64 %rd445, %rd434;
$L__tmp749:
mov.b64 %rd446, %rd436;
$L__tmp750:
mov.b64 %rd447, %rd438;
st.u64 [%SP+408], %rd447;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
bra.uni $L__tmp751;
$L__tmp751:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd448, [%SP+400];
$L__tmp752:
.loc 11 0 5739
mov.b64 %rd449, %rd441;
$L__tmp753:
mov.b64 %rd450, %rd442;
$L__tmp754:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp755;
$L__tmp755:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd451, %rd449, 3;
add.s64 %rd452, %rd450, %rd451;
st.u64 [%rd448], %rd452;
$L__tmp756:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd453, [%SP+400];
mov.b64 %rd454, %rd441;
$L__tmp757:
.loc 11 0 5752
mov.b64 %rd455, %rd443;
$L__tmp758:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp759;
$L__tmp759:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd456, %rd454, 3;
add.s64 %rd457, %rd455, %rd456;
st.u64 [%rd453+8], %rd457;
$L__tmp760:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd458, [%SP+400];
mov.b64 %rd459, %rd441;
$L__tmp761:
.loc 11 0 5765
mov.b64 %rd460, %rd444;
$L__tmp762:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp763;
$L__tmp763:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd461, %rd459, 3;
add.s64 %rd462, %rd460, %rd461;
st.u64 [%rd458+16], %rd462;
$L__tmp764:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd463, [%SP+400];
mov.b64 %rd464, %rd441;
$L__tmp765:
.loc 11 0 5778
mov.b64 %rd465, %rd445;
$L__tmp766:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp767;
$L__tmp767:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd466, %rd464, 1;
add.s64 %rd467, %rd465, %rd466;
st.u64 [%rd463+24], %rd467;
$L__tmp768:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd468, [%SP+400];
mov.b64 %rd469, %rd441;
$L__tmp769:
.loc 11 0 5801
mov.b64 %rd470, %rd446;
$L__tmp770:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp771;
$L__tmp771:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd471, %rd469, 2;
add.s64 %rd472, %rd470, %rd471;
st.u64 [%rd468+32], %rd472;
$L__tmp772:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd473, [%SP+400];
ld.u64 %rd474, [%SP+408];
mov.b64 %rd475, %rd441;
$L__tmp773:
.loc 11 0 5822
mov.b64 %rd476, %rd474;
st.u64 [%SP+392], %rd476;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp774;
$L__tmp774:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd477, [%SP+392];
shl.b64 %rd478, %rd475, 3;
add.s64 %rd479, %rd477, %rd478;
st.u64 [%rd473+40], %rd479;
$L__tmp775:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd480, [%SP+424];
setp.ne.s64 %p51, %rd480, 0;
not.pred %p52, %p51;
not.pred %p53, %p52;
@%p53 bra $L__BB6_36;
bra.uni $L__BB6_35;
$L__BB6_35:
mov.u32 %r38, 0;
mov.b32 %r39, %r38;
bra.uni $L__BB6_36;
$L__BB6_36:
ld.u64 %rd481, [%SP+432];
ld.u64 %rd482, [%SP+440];
ld.u64 %rd483, [%SP+448];
ld.u64 %rd484, [%SP+456];
ld.u64 %rd485, [%SP+464];
ld.u64 %rd486, [%SP+472];
$L__tmp776:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 14
st.u64 [%SP+2104], %rd486;
st.u64 [%SP+2096], %rd485;
st.u64 [%SP+2088], %rd484;
st.u64 [%SP+2080], %rd483;
st.u64 [%SP+2072], %rd482;
st.u64 [%SP+2064], %rd481;
ld.u64 %rd487, [%SP+2080];
st.u64 [%SP+1744], %rd487;
add.u64 %rd488, %SP, 1744;
mov.b64 %rd489, %rd488;
st.u64 [%SP+480], %rd489;
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 14
bra.uni $L__tmp777;
$L__tmp777:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
ld.u64 %rd490, [%SP+480];
setp.ne.s64 %p54, %rd490, 0;
not.pred %p55, %p54;
not.pred %p56, %p55;
@%p56 bra $L__BB6_38;
bra.uni $L__BB6_37;
$L__BB6_37:
mov.u32 %r40, 0;
mov.b32 %r41, %r40;
bra.uni $L__BB6_38;
$L__BB6_38:
ld.u64 %rd491, [%SP+480];
ld.u64 %rd492, [%rd491];
mov.b64 %rd493, %rd492;
$L__tmp778:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 14
ld.f64 %fd4, [%rd493];
add.u64 %rd494, %SP, 1600;
mov.b64 %rd495, %rd494;
st.u64 [%SP+520], %rd495;
mov.b64 %rd10, %rd1;
$L__tmp779:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 23
bra.uni $L__tmp780;
$L__tmp780:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
ld.u64 %rd496, [%SP+520];
mov.b64 %rd497, %rd496;
st.u64 [%SP+512], %rd497;
mov.b64 %rd498, %rd10;
$L__tmp781:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
bra.uni $L__tmp782;
$L__tmp782:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd499, [%SP+512];
setp.ne.s64 %p57, %rd499, 0;
not.pred %p58, %p57;
not.pred %p59, %p58;
@%p59 bra $L__BB6_40;
bra.uni $L__BB6_39;
$L__BB6_39:
mov.u32 %r42, 0;
mov.b32 %r43, %r42;
bra.uni $L__BB6_40;
$L__tmp783:
$L__BB6_40:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd500, [%SP+520];
ld.u64 %rd501, [%rd500+24];
ld.u64 %rd502, [%SP+520];
ld.u64 %rd503, [%rd502+32];
ld.u64 %rd504, [%SP+520];
ld.u64 %rd505, [%rd504+40];
ld.u64 %rd506, [%SP+520];
ld.u64 %rd507, [%rd506+48];
ld.u64 %rd508, [%SP+520];
ld.u64 %rd509, [%rd508+56];
ld.u64 %rd510, [%SP+520];
ld.u64 %rd511, [%rd510+64];
add.u64 %rd512, %SP, 528;
mov.b64 %rd513, %rd512;
st.u64 [%SP+496], %rd513;
mov.b64 %rd514, %rd10;
$L__tmp784:
.loc 11 0 6996
mov.b64 %rd515, %rd501;
$L__tmp785:
mov.b64 %rd516, %rd503;
$L__tmp786:
mov.b64 %rd517, %rd505;
$L__tmp787:
mov.b64 %rd518, %rd507;
$L__tmp788:
mov.b64 %rd519, %rd509;
$L__tmp789:
mov.b64 %rd520, %rd511;
st.u64 [%SP+504], %rd520;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
bra.uni $L__tmp790;
$L__tmp790:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd521, [%SP+496];
$L__tmp791:
.loc 11 0 5739
mov.b64 %rd522, %rd514;
$L__tmp792:
mov.b64 %rd523, %rd515;
$L__tmp793:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp794;
$L__tmp794:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd524, %rd522, 3;
add.s64 %rd525, %rd523, %rd524;
st.u64 [%rd521], %rd525;
$L__tmp795:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd526, [%SP+496];
mov.b64 %rd527, %rd514;
$L__tmp796:
.loc 11 0 5752
mov.b64 %rd528, %rd516;
$L__tmp797:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp798;
$L__tmp798:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd529, %rd527, 3;
add.s64 %rd530, %rd528, %rd529;
st.u64 [%rd526+8], %rd530;
$L__tmp799:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd531, [%SP+496];
mov.b64 %rd532, %rd514;
$L__tmp800:
.loc 11 0 5765
mov.b64 %rd533, %rd517;
$L__tmp801:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp802;
$L__tmp802:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd534, %rd532, 3;
add.s64 %rd535, %rd533, %rd534;
st.u64 [%rd531+16], %rd535;
$L__tmp803:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd536, [%SP+496];
mov.b64 %rd537, %rd514;
$L__tmp804:
.loc 11 0 5778
mov.b64 %rd538, %rd518;
$L__tmp805:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp806;
$L__tmp806:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd539, %rd537, 1;
add.s64 %rd540, %rd538, %rd539;
st.u64 [%rd536+24], %rd540;
$L__tmp807:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd541, [%SP+496];
mov.b64 %rd542, %rd514;
$L__tmp808:
.loc 11 0 5801
mov.b64 %rd543, %rd519;
$L__tmp809:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp810;
$L__tmp810:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd544, %rd542, 2;
add.s64 %rd545, %rd543, %rd544;
st.u64 [%rd541+32], %rd545;
$L__tmp811:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd546, [%SP+496];
ld.u64 %rd547, [%SP+504];
mov.b64 %rd548, %rd514;
$L__tmp812:
.loc 11 0 5822
mov.b64 %rd549, %rd547;
st.u64 [%SP+488], %rd549;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp813;
$L__tmp813:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd550, [%SP+488];
shl.b64 %rd551, %rd548, 3;
add.s64 %rd552, %rd550, %rd551;
st.u64 [%rd546+40], %rd552;
$L__tmp814:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd553, [%SP+520];
setp.ne.s64 %p60, %rd553, 0;
not.pred %p61, %p60;
not.pred %p62, %p61;
@%p62 bra $L__BB6_42;
bra.uni $L__BB6_41;
$L__BB6_41:
mov.u32 %r44, 0;
mov.b32 %r45, %r44;
bra.uni $L__BB6_42;
$L__BB6_42:
ld.u64 %rd554, [%SP+528];
ld.u64 %rd555, [%SP+536];
ld.u64 %rd556, [%SP+544];
ld.u64 %rd557, [%SP+552];
ld.u64 %rd558, [%SP+560];
ld.u64 %rd559, [%SP+568];
$L__tmp815:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 23
st.u64 [%SP+2152], %rd559;
st.u64 [%SP+2144], %rd558;
st.u64 [%SP+2136], %rd557;
st.u64 [%SP+2128], %rd556;
st.u64 [%SP+2120], %rd555;
st.u64 [%SP+2112], %rd554;
ld.u64 %rd560, [%SP+2112];
st.u64 [%SP+1752], %rd560;
add.u64 %rd561, %SP, 1752;
mov.b64 %rd562, %rd561;
st.u64 [%SP+576], %rd562;
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 23
bra.uni $L__tmp816;
$L__tmp816:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
ld.u64 %rd563, [%SP+576];
setp.ne.s64 %p63, %rd563, 0;
not.pred %p64, %p63;
not.pred %p65, %p64;
@%p65 bra $L__BB6_44;
bra.uni $L__BB6_43;
$L__BB6_43:
mov.u32 %r46, 0;
mov.b32 %r47, %r46;
bra.uni $L__BB6_44;
$L__BB6_44:
ld.u64 %rd564, [%SP+576];
ld.u64 %rd565, [%rd564];
mov.b64 %rd566, %rd565;
$L__tmp817:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 23
ld.f64 %fd15, [%rd566];
mul.f64 %fd5, %fd4, %fd15;
add.u64 %rd567, %SP, 1520;
mov.b64 %rd568, %rd567;
st.u64 [%SP+616], %rd568;
mov.b64 %rd11, %rd1;
$L__tmp818:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 32
bra.uni $L__tmp819;
$L__tmp819:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
ld.u64 %rd569, [%SP+616];
mov.b64 %rd570, %rd569;
st.u64 [%SP+608], %rd570;
mov.b64 %rd571, %rd11;
$L__tmp820:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
bra.uni $L__tmp821;
$L__tmp821:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd572, [%SP+608];
setp.ne.s64 %p66, %rd572, 0;
not.pred %p67, %p66;
not.pred %p68, %p67;
@%p68 bra $L__BB6_46;
bra.uni $L__BB6_45;
$L__BB6_45:
mov.u32 %r48, 0;
mov.b32 %r49, %r48;
bra.uni $L__BB6_46;
$L__tmp822:
$L__BB6_46:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd573, [%SP+616];
ld.u64 %rd574, [%rd573+24];
ld.u64 %rd575, [%SP+616];
ld.u64 %rd576, [%rd575+32];
ld.u64 %rd577, [%SP+616];
ld.u64 %rd578, [%rd577+40];
ld.u64 %rd579, [%SP+616];
ld.u64 %rd580, [%rd579+48];
ld.u64 %rd581, [%SP+616];
ld.u64 %rd582, [%rd581+56];
ld.u64 %rd583, [%SP+616];
ld.u64 %rd584, [%rd583+64];
add.u64 %rd585, %SP, 624;
mov.b64 %rd586, %rd585;
st.u64 [%SP+592], %rd586;
mov.b64 %rd587, %rd11;
$L__tmp823:
.loc 11 0 6996
mov.b64 %rd588, %rd574;
$L__tmp824:
mov.b64 %rd589, %rd576;
$L__tmp825:
mov.b64 %rd590, %rd578;
$L__tmp826:
mov.b64 %rd591, %rd580;
$L__tmp827:
mov.b64 %rd592, %rd582;
$L__tmp828:
mov.b64 %rd593, %rd584;
st.u64 [%SP+600], %rd593;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
bra.uni $L__tmp829;
$L__tmp829:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd594, [%SP+592];
$L__tmp830:
.loc 11 0 5739
mov.b64 %rd595, %rd587;
$L__tmp831:
mov.b64 %rd596, %rd588;
$L__tmp832:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp833;
$L__tmp833:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd597, %rd595, 3;
add.s64 %rd598, %rd596, %rd597;
st.u64 [%rd594], %rd598;
$L__tmp834:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd599, [%SP+592];
mov.b64 %rd600, %rd587;
$L__tmp835:
.loc 11 0 5752
mov.b64 %rd601, %rd589;
$L__tmp836:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp837;
$L__tmp837:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd602, %rd600, 3;
add.s64 %rd603, %rd601, %rd602;
st.u64 [%rd599+8], %rd603;
$L__tmp838:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd604, [%SP+592];
mov.b64 %rd605, %rd587;
$L__tmp839:
.loc 11 0 5765
mov.b64 %rd606, %rd590;
$L__tmp840:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp841;
$L__tmp841:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd607, %rd605, 3;
add.s64 %rd608, %rd606, %rd607;
st.u64 [%rd604+16], %rd608;
$L__tmp842:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd609, [%SP+592];
mov.b64 %rd610, %rd587;
$L__tmp843:
.loc 11 0 5778
mov.b64 %rd611, %rd591;
$L__tmp844:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp845;
$L__tmp845:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd612, %rd610, 1;
add.s64 %rd613, %rd611, %rd612;
st.u64 [%rd609+24], %rd613;
$L__tmp846:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd614, [%SP+592];
mov.b64 %rd615, %rd587;
$L__tmp847:
.loc 11 0 5801
mov.b64 %rd616, %rd592;
$L__tmp848:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp849;
$L__tmp849:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd617, %rd615, 2;
add.s64 %rd618, %rd616, %rd617;
st.u64 [%rd614+32], %rd618;
$L__tmp850:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd619, [%SP+592];
ld.u64 %rd620, [%SP+600];
mov.b64 %rd621, %rd587;
$L__tmp851:
.loc 11 0 5822
mov.b64 %rd622, %rd620;
st.u64 [%SP+584], %rd622;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp852;
$L__tmp852:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd623, [%SP+584];
shl.b64 %rd624, %rd621, 3;
add.s64 %rd625, %rd623, %rd624;
st.u64 [%rd619+40], %rd625;
$L__tmp853:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd626, [%SP+616];
setp.ne.s64 %p69, %rd626, 0;
not.pred %p70, %p69;
not.pred %p71, %p70;
@%p71 bra $L__BB6_48;
bra.uni $L__BB6_47;
$L__BB6_47:
mov.u32 %r50, 0;
mov.b32 %r51, %r50;
bra.uni $L__BB6_48;
$L__BB6_48:
ld.u64 %rd627, [%SP+624];
ld.u64 %rd628, [%SP+632];
ld.u64 %rd629, [%SP+640];
ld.u64 %rd630, [%SP+648];
ld.u64 %rd631, [%SP+656];
ld.u64 %rd632, [%SP+664];
$L__tmp854:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 32
st.u64 [%SP+2200], %rd632;
st.u64 [%SP+2192], %rd631;
st.u64 [%SP+2184], %rd630;
st.u64 [%SP+2176], %rd629;
st.u64 [%SP+2168], %rd628;
st.u64 [%SP+2160], %rd627;
ld.u64 %rd633, [%SP+2160];
st.u64 [%SP+1760], %rd633;
add.u64 %rd634, %SP, 1760;
mov.b64 %rd635, %rd634;
st.u64 [%SP+672], %rd635;
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 32
bra.uni $L__tmp855;
$L__tmp855:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
ld.u64 %rd636, [%SP+672];
setp.ne.s64 %p72, %rd636, 0;
not.pred %p73, %p72;
not.pred %p74, %p73;
@%p74 bra $L__BB6_50;
bra.uni $L__BB6_49;
$L__BB6_49:
mov.u32 %r52, 0;
mov.b32 %r53, %r52;
bra.uni $L__BB6_50;
$L__BB6_50:
ld.u64 %rd637, [%SP+672];
ld.u64 %rd638, [%rd637];
mov.b64 %rd639, %rd638;
$L__tmp856:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 32
ld.f64 %fd6, [%rd639];
add.u64 %rd640, %SP, 1600;
mov.b64 %rd641, %rd640;
st.u64 [%SP+712], %rd641;
mov.b64 %rd12, %rd1;
$L__tmp857:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 41
bra.uni $L__tmp858;
$L__tmp858:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
ld.u64 %rd642, [%SP+712];
mov.b64 %rd643, %rd642;
st.u64 [%SP+704], %rd643;
mov.b64 %rd644, %rd12;
$L__tmp859:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
bra.uni $L__tmp860;
$L__tmp860:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd645, [%SP+704];
setp.ne.s64 %p75, %rd645, 0;
not.pred %p76, %p75;
not.pred %p77, %p76;
@%p77 bra $L__BB6_52;
bra.uni $L__BB6_51;
$L__BB6_51:
mov.u32 %r54, 0;
mov.b32 %r55, %r54;
bra.uni $L__BB6_52;
$L__tmp861:
$L__BB6_52:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd646, [%SP+712];
ld.u64 %rd647, [%rd646+24];
ld.u64 %rd648, [%SP+712];
ld.u64 %rd649, [%rd648+32];
ld.u64 %rd650, [%SP+712];
ld.u64 %rd651, [%rd650+40];
ld.u64 %rd652, [%SP+712];
ld.u64 %rd653, [%rd652+48];
ld.u64 %rd654, [%SP+712];
ld.u64 %rd655, [%rd654+56];
ld.u64 %rd656, [%SP+712];
ld.u64 %rd657, [%rd656+64];
add.u64 %rd658, %SP, 720;
mov.b64 %rd659, %rd658;
st.u64 [%SP+688], %rd659;
mov.b64 %rd660, %rd12;
$L__tmp862:
.loc 11 0 6996
mov.b64 %rd661, %rd647;
$L__tmp863:
mov.b64 %rd662, %rd649;
$L__tmp864:
mov.b64 %rd663, %rd651;
$L__tmp865:
mov.b64 %rd664, %rd653;
$L__tmp866:
mov.b64 %rd665, %rd655;
$L__tmp867:
mov.b64 %rd666, %rd657;
st.u64 [%SP+696], %rd666;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
bra.uni $L__tmp868;
$L__tmp868:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd667, [%SP+688];
$L__tmp869:
.loc 11 0 5739
mov.b64 %rd668, %rd660;
$L__tmp870:
mov.b64 %rd669, %rd661;
$L__tmp871:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp872;
$L__tmp872:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd670, %rd668, 3;
add.s64 %rd671, %rd669, %rd670;
st.u64 [%rd667], %rd671;
$L__tmp873:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd672, [%SP+688];
mov.b64 %rd673, %rd660;
$L__tmp874:
.loc 11 0 5752
mov.b64 %rd674, %rd662;
$L__tmp875:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp876;
$L__tmp876:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd675, %rd673, 3;
add.s64 %rd676, %rd674, %rd675;
st.u64 [%rd672+8], %rd676;
$L__tmp877:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd677, [%SP+688];
mov.b64 %rd678, %rd660;
$L__tmp878:
.loc 11 0 5765
mov.b64 %rd679, %rd663;
$L__tmp879:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp880;
$L__tmp880:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd680, %rd678, 3;
add.s64 %rd681, %rd679, %rd680;
st.u64 [%rd677+16], %rd681;
$L__tmp881:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd682, [%SP+688];
mov.b64 %rd683, %rd660;
$L__tmp882:
.loc 11 0 5778
mov.b64 %rd684, %rd664;
$L__tmp883:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp884;
$L__tmp884:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd685, %rd683, 1;
add.s64 %rd686, %rd684, %rd685;
st.u64 [%rd682+24], %rd686;
$L__tmp885:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd687, [%SP+688];
mov.b64 %rd688, %rd660;
$L__tmp886:
.loc 11 0 5801
mov.b64 %rd689, %rd665;
$L__tmp887:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp888;
$L__tmp888:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd690, %rd688, 2;
add.s64 %rd691, %rd689, %rd690;
st.u64 [%rd687+32], %rd691;
$L__tmp889:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd692, [%SP+688];
ld.u64 %rd693, [%SP+696];
mov.b64 %rd694, %rd660;
$L__tmp890:
.loc 11 0 5822
mov.b64 %rd695, %rd693;
st.u64 [%SP+680], %rd695;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp891;
$L__tmp891:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd696, [%SP+680];
shl.b64 %rd697, %rd694, 3;
add.s64 %rd698, %rd696, %rd697;
st.u64 [%rd692+40], %rd698;
$L__tmp892:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd699, [%SP+712];
setp.ne.s64 %p78, %rd699, 0;
not.pred %p79, %p78;
not.pred %p80, %p79;
@%p80 bra $L__BB6_54;
bra.uni $L__BB6_53;
$L__BB6_53:
mov.u32 %r56, 0;
mov.b32 %r57, %r56;
bra.uni $L__BB6_54;
$L__BB6_54:
ld.u64 %rd700, [%SP+720];
ld.u64 %rd701, [%SP+728];
ld.u64 %rd702, [%SP+736];
ld.u64 %rd703, [%SP+744];
ld.u64 %rd704, [%SP+752];
ld.u64 %rd705, [%SP+760];
$L__tmp893:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 41
st.u64 [%SP+2248], %rd705;
st.u64 [%SP+2240], %rd704;
st.u64 [%SP+2232], %rd703;
st.u64 [%SP+2224], %rd702;
st.u64 [%SP+2216], %rd701;
st.u64 [%SP+2208], %rd700;
ld.u64 %rd706, [%SP+2224];
st.u64 [%SP+1768], %rd706;
add.u64 %rd707, %SP, 1768;
mov.b64 %rd708, %rd707;
st.u64 [%SP+768], %rd708;
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 41
bra.uni $L__tmp894;
$L__tmp894:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
ld.u64 %rd709, [%SP+768];
setp.ne.s64 %p81, %rd709, 0;
not.pred %p82, %p81;
not.pred %p83, %p82;
@%p83 bra $L__BB6_56;
bra.uni $L__BB6_55;
$L__BB6_55:
mov.u32 %r58, 0;
mov.b32 %r59, %r58;
bra.uni $L__BB6_56;
$L__BB6_56:
ld.u64 %rd710, [%SP+768];
ld.u64 %rd711, [%rd710];
mov.b64 %rd712, %rd711;
$L__tmp895:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 41
ld.f64 %fd16, [%rd712];
mul.f64 %fd17, %fd6, %fd16;
sub.f64 %fd18, %fd5, %fd17;
st.f64 [%SP+1736], %fd18;
add.u64 %rd713, %SP, 1736;
mov.b64 %rd13, %rd713;
add.u64 %rd714, %SP, 1440;
mov.b64 %rd715, %rd714;
st.u64 [%SP+808], %rd715;
mov.b64 %rd14, %rd1;
$L__tmp896:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 5
bra.uni $L__tmp897;
$L__tmp897:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6763
ld.u64 %rd716, [%SP+808];
mov.b64 %rd717, %rd716;
st.u64 [%SP+800], %rd717;
mov.b64 %rd718, %rd14;
$L__tmp898:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6763
bra.uni $L__tmp899;
$L__tmp899:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd719, [%SP+800];
setp.ne.s64 %p84, %rd719, 0;
not.pred %p85, %p84;
not.pred %p86, %p85;
@%p86 bra $L__BB6_58;
bra.uni $L__BB6_57;
$L__BB6_57:
mov.u32 %r60, 0;
mov.b32 %r61, %r60;
bra.uni $L__BB6_58;
$L__tmp900:
$L__BB6_58:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6782
ld.u64 %rd720, [%SP+808];
ld.u64 %rd721, [%rd720+24];
ld.u64 %rd722, [%SP+808];
ld.u64 %rd723, [%rd722+32];
ld.u64 %rd724, [%SP+808];
ld.u64 %rd725, [%rd724+40];
ld.u64 %rd726, [%SP+808];
ld.u64 %rd727, [%rd726+48];
ld.u64 %rd728, [%SP+808];
ld.u64 %rd729, [%rd728+56];
ld.u64 %rd730, [%SP+808];
ld.u64 %rd731, [%rd730+64];
add.u64 %rd732, %SP, 816;
mov.b64 %rd733, %rd732;
st.u64 [%SP+784], %rd733;
mov.b64 %rd734, %rd14;
$L__tmp901:
.loc 11 0 6782
mov.b64 %rd735, %rd721;
$L__tmp902:
mov.b64 %rd736, %rd723;
$L__tmp903:
mov.b64 %rd737, %rd725;
$L__tmp904:
mov.b64 %rd738, %rd727;
$L__tmp905:
mov.b64 %rd739, %rd729;
$L__tmp906:
mov.b64 %rd740, %rd731;
st.u64 [%SP+792], %rd740;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6782
bra.uni $L__tmp907;
$L__tmp907:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd741, [%SP+784];
$L__tmp908:
.loc 11 0 5739
mov.b64 %rd742, %rd734;
$L__tmp909:
mov.b64 %rd743, %rd735;
$L__tmp910:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp911;
$L__tmp911:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd744, %rd742, 3;
add.s64 %rd745, %rd743, %rd744;
st.u64 [%rd741], %rd745;
$L__tmp912:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd746, [%SP+784];
mov.b64 %rd747, %rd734;
$L__tmp913:
.loc 11 0 5752
mov.b64 %rd748, %rd736;
$L__tmp914:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp915;
$L__tmp915:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd749, %rd747, 3;
add.s64 %rd750, %rd748, %rd749;
st.u64 [%rd746+8], %rd750;
$L__tmp916:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd751, [%SP+784];
mov.b64 %rd752, %rd734;
$L__tmp917:
.loc 11 0 5765
mov.b64 %rd753, %rd737;
$L__tmp918:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp919;
$L__tmp919:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd754, %rd752, 3;
add.s64 %rd755, %rd753, %rd754;
st.u64 [%rd751+16], %rd755;
$L__tmp920:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd756, [%SP+784];
mov.b64 %rd757, %rd734;
$L__tmp921:
.loc 11 0 5778
mov.b64 %rd758, %rd738;
$L__tmp922:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp923;
$L__tmp923:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd759, %rd757, 1;
add.s64 %rd760, %rd758, %rd759;
st.u64 [%rd756+24], %rd760;
$L__tmp924:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd761, [%SP+784];
mov.b64 %rd762, %rd734;
$L__tmp925:
.loc 11 0 5801
mov.b64 %rd763, %rd739;
$L__tmp926:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp927;
$L__tmp927:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd764, %rd762, 2;
add.s64 %rd765, %rd763, %rd764;
st.u64 [%rd761+32], %rd765;
$L__tmp928:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd766, [%SP+784];
ld.u64 %rd767, [%SP+792];
mov.b64 %rd768, %rd734;
$L__tmp929:
.loc 11 0 5822
mov.b64 %rd769, %rd767;
st.u64 [%SP+776], %rd769;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp930;
$L__tmp930:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd770, [%SP+776];
shl.b64 %rd771, %rd768, 3;
add.s64 %rd772, %rd770, %rd771;
st.u64 [%rd766+40], %rd772;
$L__tmp931:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6782
ld.u64 %rd773, [%SP+808];
setp.ne.s64 %p87, %rd773, 0;
not.pred %p88, %p87;
not.pred %p89, %p88;
@%p89 bra $L__BB6_60;
bra.uni $L__BB6_59;
$L__BB6_59:
mov.u32 %r62, 0;
mov.b32 %r63, %r62;
bra.uni $L__BB6_60;
$L__BB6_60:
ld.u64 %rd774, [%SP+816];
ld.u64 %rd775, [%SP+824];
ld.u64 %rd776, [%SP+832];
ld.u64 %rd777, [%SP+840];
ld.u64 %rd778, [%SP+848];
ld.u64 %rd779, [%SP+856];
$L__tmp932:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 5
st.u64 [%SP+2296], %rd779;
st.u64 [%SP+2288], %rd778;
st.u64 [%SP+2280], %rd777;
st.u64 [%SP+2272], %rd776;
st.u64 [%SP+2264], %rd775;
st.u64 [%SP+2256], %rd774;
ld.u64 %rd780, [%SP+2264];
st.u64 [%SP+1728], %rd780;
add.u64 %rd781, %SP, 1728;
mov.b64 %rd782, %rd781;
st.u64 [%SP+864], %rd782;
mov.b64 %rd15, %rd13;
$L__tmp933:
//test_v7_cuda.cu:71 r[i].y = a[i].z * b[i].x - a[i].x * b[i].z;
.loc 18 71 5
bra.uni $L__tmp934;
$L__tmp934:
//soa_v7.h:35 SOA_HOST_DEVICE_INLINE T& operator= (const T2& v) { return val_ = v; }
.loc 12 35 120
ld.u64 %rd783, [%SP+864];
setp.ne.s64 %p90, %rd783, 0;
not.pred %p91, %p90;
not.pred %p92, %p91;
@%p92 bra $L__BB6_62;
bra.uni $L__BB6_61;
$L__BB6_61:
mov.u32 %r64, 0;
mov.b32 %r65, %r64;
bra.uni $L__BB6_62;
$L__BB6_62:
ld.f64 %fd19, [%rd15];
ld.u64 %rd784, [%SP+864];
ld.u64 %rd785, [%rd784];
mov.b64 %rd786, %rd785;
st.f64 [%rd786], %fd19;
mov.b64 %rd787, %rd786;
add.u64 %rd788, %SP, 1520;
mov.b64 %rd789, %rd788;
st.u64 [%SP+904], %rd789;
mov.b64 %rd16, %rd1;
$L__tmp935:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 14
bra.uni $L__tmp936;
$L__tmp936:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
ld.u64 %rd790, [%SP+904];
mov.b64 %rd791, %rd790;
st.u64 [%SP+896], %rd791;
mov.b64 %rd792, %rd16;
$L__tmp937:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
bra.uni $L__tmp938;
$L__tmp938:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd793, [%SP+896];
setp.ne.s64 %p93, %rd793, 0;
not.pred %p94, %p93;
not.pred %p95, %p94;
@%p95 bra $L__BB6_64;
bra.uni $L__BB6_63;
$L__BB6_63:
mov.u32 %r66, 0;
mov.b32 %r67, %r66;
bra.uni $L__BB6_64;
$L__tmp939:
$L__BB6_64:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd794, [%SP+904];
ld.u64 %rd795, [%rd794+24];
ld.u64 %rd796, [%SP+904];
ld.u64 %rd797, [%rd796+32];
ld.u64 %rd798, [%SP+904];
ld.u64 %rd799, [%rd798+40];
ld.u64 %rd800, [%SP+904];
ld.u64 %rd801, [%rd800+48];
ld.u64 %rd802, [%SP+904];
ld.u64 %rd803, [%rd802+56];
ld.u64 %rd804, [%SP+904];
ld.u64 %rd805, [%rd804+64];
add.u64 %rd806, %SP, 912;
mov.b64 %rd807, %rd806;
st.u64 [%SP+880], %rd807;
mov.b64 %rd808, %rd16;
$L__tmp940:
.loc 11 0 6996
mov.b64 %rd809, %rd795;
$L__tmp941:
mov.b64 %rd810, %rd797;
$L__tmp942:
mov.b64 %rd811, %rd799;
$L__tmp943:
mov.b64 %rd812, %rd801;
$L__tmp944:
mov.b64 %rd813, %rd803;
$L__tmp945:
mov.b64 %rd814, %rd805;
st.u64 [%SP+888], %rd814;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
bra.uni $L__tmp946;
$L__tmp946:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd815, [%SP+880];
$L__tmp947:
.loc 11 0 5739
mov.b64 %rd816, %rd808;
$L__tmp948:
mov.b64 %rd817, %rd809;
$L__tmp949:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp950;
$L__tmp950:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd818, %rd816, 3;
add.s64 %rd819, %rd817, %rd818;
st.u64 [%rd815], %rd819;
$L__tmp951:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd820, [%SP+880];
mov.b64 %rd821, %rd808;
$L__tmp952:
.loc 11 0 5752
mov.b64 %rd822, %rd810;
$L__tmp953:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp954;
$L__tmp954:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd823, %rd821, 3;
add.s64 %rd824, %rd822, %rd823;
st.u64 [%rd820+8], %rd824;
$L__tmp955:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd825, [%SP+880];
mov.b64 %rd826, %rd808;
$L__tmp956:
.loc 11 0 5765
mov.b64 %rd827, %rd811;
$L__tmp957:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp958;
$L__tmp958:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd828, %rd826, 3;
add.s64 %rd829, %rd827, %rd828;
st.u64 [%rd825+16], %rd829;
$L__tmp959:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd830, [%SP+880];
mov.b64 %rd831, %rd808;
$L__tmp960:
.loc 11 0 5778
mov.b64 %rd832, %rd812;
$L__tmp961:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp962;
$L__tmp962:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd833, %rd831, 1;
add.s64 %rd834, %rd832, %rd833;
st.u64 [%rd830+24], %rd834;
$L__tmp963:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd835, [%SP+880];
mov.b64 %rd836, %rd808;
$L__tmp964:
.loc 11 0 5801
mov.b64 %rd837, %rd813;
$L__tmp965:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp966;
$L__tmp966:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd838, %rd836, 2;
add.s64 %rd839, %rd837, %rd838;
st.u64 [%rd835+32], %rd839;
$L__tmp967:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd840, [%SP+880];
ld.u64 %rd841, [%SP+888];
mov.b64 %rd842, %rd808;
$L__tmp968:
.loc 11 0 5822
mov.b64 %rd843, %rd841;
st.u64 [%SP+872], %rd843;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp969;
$L__tmp969:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd844, [%SP+872];
shl.b64 %rd845, %rd842, 3;
add.s64 %rd846, %rd844, %rd845;
st.u64 [%rd840+40], %rd846;
$L__tmp970:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd847, [%SP+904];
setp.ne.s64 %p96, %rd847, 0;
not.pred %p97, %p96;
not.pred %p98, %p97;
@%p98 bra $L__BB6_66;
bra.uni $L__BB6_65;
$L__BB6_65:
mov.u32 %r68, 0;
mov.b32 %r69, %r68;
bra.uni $L__BB6_66;
$L__BB6_66:
ld.u64 %rd848, [%SP+912];
ld.u64 %rd849, [%SP+920];
ld.u64 %rd850, [%SP+928];
ld.u64 %rd851, [%SP+936];
ld.u64 %rd852, [%SP+944];
ld.u64 %rd853, [%SP+952];
$L__tmp971:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 14
st.u64 [%SP+2344], %rd853;
st.u64 [%SP+2336], %rd852;
st.u64 [%SP+2328], %rd851;
st.u64 [%SP+2320], %rd850;
st.u64 [%SP+2312], %rd849;
st.u64 [%SP+2304], %rd848;
ld.u64 %rd854, [%SP+2304];
st.u64 [%SP+1792], %rd854;
add.u64 %rd855, %SP, 1792;
mov.b64 %rd856, %rd855;
st.u64 [%SP+960], %rd856;
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 14
bra.uni $L__tmp972;
$L__tmp972:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
ld.u64 %rd857, [%SP+960];
setp.ne.s64 %p99, %rd857, 0;
not.pred %p100, %p99;
not.pred %p101, %p100;
@%p101 bra $L__BB6_68;
bra.uni $L__BB6_67;
$L__BB6_67:
mov.u32 %r70, 0;
mov.b32 %r71, %r70;
bra.uni $L__BB6_68;
$L__BB6_68:
ld.u64 %rd858, [%SP+960];
ld.u64 %rd859, [%rd858];
mov.b64 %rd860, %rd859;
$L__tmp973:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 14
ld.f64 %fd7, [%rd860];
add.u64 %rd861, %SP, 1600;
mov.b64 %rd862, %rd861;
st.u64 [%SP+1000], %rd862;
mov.b64 %rd17, %rd1;
$L__tmp974:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 23
bra.uni $L__tmp975;
$L__tmp975:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
ld.u64 %rd863, [%SP+1000];
mov.b64 %rd864, %rd863;
st.u64 [%SP+992], %rd864;
mov.b64 %rd865, %rd17;
$L__tmp976:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
bra.uni $L__tmp977;
$L__tmp977:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd866, [%SP+992];
setp.ne.s64 %p102, %rd866, 0;
not.pred %p103, %p102;
not.pred %p104, %p103;
@%p104 bra $L__BB6_70;
bra.uni $L__BB6_69;
$L__BB6_69:
mov.u32 %r72, 0;
mov.b32 %r73, %r72;
bra.uni $L__BB6_70;
$L__tmp978:
$L__BB6_70:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd867, [%SP+1000];
ld.u64 %rd868, [%rd867+24];
ld.u64 %rd869, [%SP+1000];
ld.u64 %rd870, [%rd869+32];
ld.u64 %rd871, [%SP+1000];
ld.u64 %rd872, [%rd871+40];
ld.u64 %rd873, [%SP+1000];
ld.u64 %rd874, [%rd873+48];
ld.u64 %rd875, [%SP+1000];
ld.u64 %rd876, [%rd875+56];
ld.u64 %rd877, [%SP+1000];
ld.u64 %rd878, [%rd877+64];
add.u64 %rd879, %SP, 1008;
mov.b64 %rd880, %rd879;
st.u64 [%SP+976], %rd880;
mov.b64 %rd881, %rd17;
$L__tmp979:
.loc 11 0 6996
mov.b64 %rd882, %rd868;
$L__tmp980:
mov.b64 %rd883, %rd870;
$L__tmp981:
mov.b64 %rd884, %rd872;
$L__tmp982:
mov.b64 %rd885, %rd874;
$L__tmp983:
mov.b64 %rd886, %rd876;
$L__tmp984:
mov.b64 %rd887, %rd878;
st.u64 [%SP+984], %rd887;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
bra.uni $L__tmp985;
$L__tmp985:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd888, [%SP+976];
$L__tmp986:
.loc 11 0 5739
mov.b64 %rd889, %rd881;
$L__tmp987:
mov.b64 %rd890, %rd882;
$L__tmp988:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp989;
$L__tmp989:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd891, %rd889, 3;
add.s64 %rd892, %rd890, %rd891;
st.u64 [%rd888], %rd892;
$L__tmp990:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd893, [%SP+976];
mov.b64 %rd894, %rd881;
$L__tmp991:
.loc 11 0 5752
mov.b64 %rd895, %rd883;
$L__tmp992:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp993;
$L__tmp993:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd896, %rd894, 3;
add.s64 %rd897, %rd895, %rd896;
st.u64 [%rd893+8], %rd897;
$L__tmp994:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd898, [%SP+976];
mov.b64 %rd899, %rd881;
$L__tmp995:
.loc 11 0 5765
mov.b64 %rd900, %rd884;
$L__tmp996:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp997;
$L__tmp997:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd901, %rd899, 3;
add.s64 %rd902, %rd900, %rd901;
st.u64 [%rd898+16], %rd902;
$L__tmp998:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd903, [%SP+976];
mov.b64 %rd904, %rd881;
$L__tmp999:
.loc 11 0 5778
mov.b64 %rd905, %rd885;
$L__tmp1000:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp1001;
$L__tmp1001:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd906, %rd904, 1;
add.s64 %rd907, %rd905, %rd906;
st.u64 [%rd903+24], %rd907;
$L__tmp1002:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd908, [%SP+976];
mov.b64 %rd909, %rd881;
$L__tmp1003:
.loc 11 0 5801
mov.b64 %rd910, %rd886;
$L__tmp1004:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp1005;
$L__tmp1005:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd911, %rd909, 2;
add.s64 %rd912, %rd910, %rd911;
st.u64 [%rd908+32], %rd912;
$L__tmp1006:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd913, [%SP+976];
ld.u64 %rd914, [%SP+984];
mov.b64 %rd915, %rd881;
$L__tmp1007:
.loc 11 0 5822
mov.b64 %rd916, %rd914;
st.u64 [%SP+968], %rd916;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp1008;
$L__tmp1008:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd917, [%SP+968];
shl.b64 %rd918, %rd915, 3;
add.s64 %rd919, %rd917, %rd918;
st.u64 [%rd913+40], %rd919;
$L__tmp1009:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd920, [%SP+1000];
setp.ne.s64 %p105, %rd920, 0;
not.pred %p106, %p105;
not.pred %p107, %p106;
@%p107 bra $L__BB6_72;
bra.uni $L__BB6_71;
$L__BB6_71:
mov.u32 %r74, 0;
mov.b32 %r75, %r74;
bra.uni $L__BB6_72;
$L__BB6_72:
ld.u64 %rd921, [%SP+1008];
ld.u64 %rd922, [%SP+1016];
ld.u64 %rd923, [%SP+1024];
ld.u64 %rd924, [%SP+1032];
ld.u64 %rd925, [%SP+1040];
ld.u64 %rd926, [%SP+1048];
$L__tmp1010:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 23
st.u64 [%SP+2392], %rd926;
st.u64 [%SP+2384], %rd925;
st.u64 [%SP+2376], %rd924;
st.u64 [%SP+2368], %rd923;
st.u64 [%SP+2360], %rd922;
st.u64 [%SP+2352], %rd921;
ld.u64 %rd927, [%SP+2360];
st.u64 [%SP+1800], %rd927;
add.u64 %rd928, %SP, 1800;
mov.b64 %rd929, %rd928;
st.u64 [%SP+1056], %rd929;
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 23
bra.uni $L__tmp1011;
$L__tmp1011:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
ld.u64 %rd930, [%SP+1056];
setp.ne.s64 %p108, %rd930, 0;
not.pred %p109, %p108;
not.pred %p110, %p109;
@%p110 bra $L__BB6_74;
bra.uni $L__BB6_73;
$L__BB6_73:
mov.u32 %r76, 0;
mov.b32 %r77, %r76;
bra.uni $L__BB6_74;
$L__BB6_74:
ld.u64 %rd931, [%SP+1056];
ld.u64 %rd932, [%rd931];
mov.b64 %rd933, %rd932;
$L__tmp1012:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 23
ld.f64 %fd20, [%rd933];
mul.f64 %fd8, %fd7, %fd20;
add.u64 %rd934, %SP, 1520;
mov.b64 %rd935, %rd934;
st.u64 [%SP+1096], %rd935;
mov.b64 %rd18, %rd1;
$L__tmp1013:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 32
bra.uni $L__tmp1014;
$L__tmp1014:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
ld.u64 %rd936, [%SP+1096];
mov.b64 %rd937, %rd936;
st.u64 [%SP+1088], %rd937;
mov.b64 %rd938, %rd18;
$L__tmp1015:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
bra.uni $L__tmp1016;
$L__tmp1016:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd939, [%SP+1088];
setp.ne.s64 %p111, %rd939, 0;
not.pred %p112, %p111;
not.pred %p113, %p112;
@%p113 bra $L__BB6_76;
bra.uni $L__BB6_75;
$L__BB6_75:
mov.u32 %r78, 0;
mov.b32 %r79, %r78;
bra.uni $L__BB6_76;
$L__tmp1017:
$L__BB6_76:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd940, [%SP+1096];
ld.u64 %rd941, [%rd940+24];
ld.u64 %rd942, [%SP+1096];
ld.u64 %rd943, [%rd942+32];
ld.u64 %rd944, [%SP+1096];
ld.u64 %rd945, [%rd944+40];
ld.u64 %rd946, [%SP+1096];
ld.u64 %rd947, [%rd946+48];
ld.u64 %rd948, [%SP+1096];
ld.u64 %rd949, [%rd948+56];
ld.u64 %rd950, [%SP+1096];
ld.u64 %rd951, [%rd950+64];
add.u64 %rd952, %SP, 1104;
mov.b64 %rd953, %rd952;
st.u64 [%SP+1072], %rd953;
mov.b64 %rd954, %rd18;
$L__tmp1018:
.loc 11 0 6996
mov.b64 %rd955, %rd941;
$L__tmp1019:
mov.b64 %rd956, %rd943;
$L__tmp1020:
mov.b64 %rd957, %rd945;
$L__tmp1021:
mov.b64 %rd958, %rd947;
$L__tmp1022:
mov.b64 %rd959, %rd949;
$L__tmp1023:
mov.b64 %rd960, %rd951;
st.u64 [%SP+1080], %rd960;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
bra.uni $L__tmp1024;
$L__tmp1024:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd961, [%SP+1072];
$L__tmp1025:
.loc 11 0 5739
mov.b64 %rd962, %rd954;
$L__tmp1026:
mov.b64 %rd963, %rd955;
$L__tmp1027:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp1028;
$L__tmp1028:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd964, %rd962, 3;
add.s64 %rd965, %rd963, %rd964;
st.u64 [%rd961], %rd965;
$L__tmp1029:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd966, [%SP+1072];
mov.b64 %rd967, %rd954;
$L__tmp1030:
.loc 11 0 5752
mov.b64 %rd968, %rd956;
$L__tmp1031:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp1032;
$L__tmp1032:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd969, %rd967, 3;
add.s64 %rd970, %rd968, %rd969;
st.u64 [%rd966+8], %rd970;
$L__tmp1033:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd971, [%SP+1072];
mov.b64 %rd972, %rd954;
$L__tmp1034:
.loc 11 0 5765
mov.b64 %rd973, %rd957;
$L__tmp1035:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp1036;
$L__tmp1036:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd974, %rd972, 3;
add.s64 %rd975, %rd973, %rd974;
st.u64 [%rd971+16], %rd975;
$L__tmp1037:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd976, [%SP+1072];
mov.b64 %rd977, %rd954;
$L__tmp1038:
.loc 11 0 5778
mov.b64 %rd978, %rd958;
$L__tmp1039:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp1040;
$L__tmp1040:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd979, %rd977, 1;
add.s64 %rd980, %rd978, %rd979;
st.u64 [%rd976+24], %rd980;
$L__tmp1041:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd981, [%SP+1072];
mov.b64 %rd982, %rd954;
$L__tmp1042:
.loc 11 0 5801
mov.b64 %rd983, %rd959;
$L__tmp1043:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp1044;
$L__tmp1044:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd984, %rd982, 2;
add.s64 %rd985, %rd983, %rd984;
st.u64 [%rd981+32], %rd985;
$L__tmp1045:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd986, [%SP+1072];
ld.u64 %rd987, [%SP+1080];
mov.b64 %rd988, %rd954;
$L__tmp1046:
.loc 11 0 5822
mov.b64 %rd989, %rd987;
st.u64 [%SP+1064], %rd989;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp1047;
$L__tmp1047:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd990, [%SP+1064];
shl.b64 %rd991, %rd988, 3;
add.s64 %rd992, %rd990, %rd991;
st.u64 [%rd986+40], %rd992;
$L__tmp1048:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd993, [%SP+1096];
setp.ne.s64 %p114, %rd993, 0;
not.pred %p115, %p114;
not.pred %p116, %p115;
@%p116 bra $L__BB6_78;
bra.uni $L__BB6_77;
$L__BB6_77:
mov.u32 %r80, 0;
mov.b32 %r81, %r80;
bra.uni $L__BB6_78;
$L__BB6_78:
ld.u64 %rd994, [%SP+1104];
ld.u64 %rd995, [%SP+1112];
ld.u64 %rd996, [%SP+1120];
ld.u64 %rd997, [%SP+1128];
ld.u64 %rd998, [%SP+1136];
ld.u64 %rd999, [%SP+1144];
$L__tmp1049:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 32
st.u64 [%SP+2440], %rd999;
st.u64 [%SP+2432], %rd998;
st.u64 [%SP+2424], %rd997;
st.u64 [%SP+2416], %rd996;
st.u64 [%SP+2408], %rd995;
st.u64 [%SP+2400], %rd994;
ld.u64 %rd1000, [%SP+2408];
st.u64 [%SP+1808], %rd1000;
add.u64 %rd1001, %SP, 1808;
mov.b64 %rd1002, %rd1001;
st.u64 [%SP+1152], %rd1002;
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 32
bra.uni $L__tmp1050;
$L__tmp1050:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
ld.u64 %rd1003, [%SP+1152];
setp.ne.s64 %p117, %rd1003, 0;
not.pred %p118, %p117;
not.pred %p119, %p118;
@%p119 bra $L__BB6_80;
bra.uni $L__BB6_79;
$L__BB6_79:
mov.u32 %r82, 0;
mov.b32 %r83, %r82;
bra.uni $L__BB6_80;
$L__BB6_80:
ld.u64 %rd1004, [%SP+1152];
ld.u64 %rd1005, [%rd1004];
mov.b64 %rd1006, %rd1005;
$L__tmp1051:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 32
ld.f64 %fd9, [%rd1006];
add.u64 %rd1007, %SP, 1600;
mov.b64 %rd1008, %rd1007;
st.u64 [%SP+1192], %rd1008;
mov.b64 %rd19, %rd1;
$L__tmp1052:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 41
bra.uni $L__tmp1053;
$L__tmp1053:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
ld.u64 %rd1009, [%SP+1192];
mov.b64 %rd1010, %rd1009;
st.u64 [%SP+1184], %rd1010;
mov.b64 %rd1011, %rd19;
$L__tmp1054:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6977
bra.uni $L__tmp1055;
$L__tmp1055:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd1012, [%SP+1184];
setp.ne.s64 %p120, %rd1012, 0;
not.pred %p121, %p120;
not.pred %p122, %p121;
@%p122 bra $L__BB6_82;
bra.uni $L__BB6_81;
$L__BB6_81:
mov.u32 %r84, 0;
mov.b32 %r85, %r84;
bra.uni $L__BB6_82;
$L__tmp1056:
$L__BB6_82:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd1013, [%SP+1192];
ld.u64 %rd1014, [%rd1013+24];
ld.u64 %rd1015, [%SP+1192];
ld.u64 %rd1016, [%rd1015+32];
ld.u64 %rd1017, [%SP+1192];
ld.u64 %rd1018, [%rd1017+40];
ld.u64 %rd1019, [%SP+1192];
ld.u64 %rd1020, [%rd1019+48];
ld.u64 %rd1021, [%SP+1192];
ld.u64 %rd1022, [%rd1021+56];
ld.u64 %rd1023, [%SP+1192];
ld.u64 %rd1024, [%rd1023+64];
add.u64 %rd1025, %SP, 1200;
mov.b64 %rd1026, %rd1025;
st.u64 [%SP+1168], %rd1026;
mov.b64 %rd1027, %rd19;
$L__tmp1057:
.loc 11 0 6996
mov.b64 %rd1028, %rd1014;
$L__tmp1058:
mov.b64 %rd1029, %rd1016;
$L__tmp1059:
mov.b64 %rd1030, %rd1018;
$L__tmp1060:
mov.b64 %rd1031, %rd1020;
$L__tmp1061:
mov.b64 %rd1032, %rd1022;
$L__tmp1062:
mov.b64 %rd1033, %rd1024;
st.u64 [%SP+1176], %rd1033;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
bra.uni $L__tmp1063;
$L__tmp1063:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd1034, [%SP+1168];
$L__tmp1064:
.loc 11 0 5739
mov.b64 %rd1035, %rd1027;
$L__tmp1065:
mov.b64 %rd1036, %rd1028;
$L__tmp1066:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp1067;
$L__tmp1067:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd1037, %rd1035, 3;
add.s64 %rd1038, %rd1036, %rd1037;
st.u64 [%rd1034], %rd1038;
$L__tmp1068:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd1039, [%SP+1168];
mov.b64 %rd1040, %rd1027;
$L__tmp1069:
.loc 11 0 5752
mov.b64 %rd1041, %rd1029;
$L__tmp1070:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp1071;
$L__tmp1071:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd1042, %rd1040, 3;
add.s64 %rd1043, %rd1041, %rd1042;
st.u64 [%rd1039+8], %rd1043;
$L__tmp1072:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd1044, [%SP+1168];
mov.b64 %rd1045, %rd1027;
$L__tmp1073:
.loc 11 0 5765
mov.b64 %rd1046, %rd1030;
$L__tmp1074:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp1075;
$L__tmp1075:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd1047, %rd1045, 3;
add.s64 %rd1048, %rd1046, %rd1047;
st.u64 [%rd1044+16], %rd1048;
$L__tmp1076:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd1049, [%SP+1168];
mov.b64 %rd1050, %rd1027;
$L__tmp1077:
.loc 11 0 5778
mov.b64 %rd1051, %rd1031;
$L__tmp1078:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp1079;
$L__tmp1079:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd1052, %rd1050, 1;
add.s64 %rd1053, %rd1051, %rd1052;
st.u64 [%rd1049+24], %rd1053;
$L__tmp1080:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd1054, [%SP+1168];
mov.b64 %rd1055, %rd1027;
$L__tmp1081:
.loc 11 0 5801
mov.b64 %rd1056, %rd1032;
$L__tmp1082:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp1083;
$L__tmp1083:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd1057, %rd1055, 2;
add.s64 %rd1058, %rd1056, %rd1057;
st.u64 [%rd1054+32], %rd1058;
$L__tmp1084:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd1059, [%SP+1168];
ld.u64 %rd1060, [%SP+1176];
mov.b64 %rd1061, %rd1027;
$L__tmp1085:
.loc 11 0 5822
mov.b64 %rd1062, %rd1060;
st.u64 [%SP+1160], %rd1062;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp1086;
$L__tmp1086:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd1063, [%SP+1160];
shl.b64 %rd1064, %rd1061, 3;
add.s64 %rd1065, %rd1063, %rd1064;
st.u64 [%rd1059+40], %rd1065;
$L__tmp1087:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6996
ld.u64 %rd1066, [%SP+1192];
setp.ne.s64 %p123, %rd1066, 0;
not.pred %p124, %p123;
not.pred %p125, %p124;
@%p125 bra $L__BB6_84;
bra.uni $L__BB6_83;
$L__BB6_83:
mov.u32 %r86, 0;
mov.b32 %r87, %r86;
bra.uni $L__BB6_84;
$L__BB6_84:
ld.u64 %rd1067, [%SP+1200];
ld.u64 %rd1068, [%SP+1208];
ld.u64 %rd1069, [%SP+1216];
ld.u64 %rd1070, [%SP+1224];
ld.u64 %rd1071, [%SP+1232];
ld.u64 %rd1072, [%SP+1240];
$L__tmp1088:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 41
st.u64 [%SP+2488], %rd1072;
st.u64 [%SP+2480], %rd1071;
st.u64 [%SP+2472], %rd1070;
st.u64 [%SP+2464], %rd1069;
st.u64 [%SP+2456], %rd1068;
st.u64 [%SP+2448], %rd1067;
ld.u64 %rd1073, [%SP+2448];
st.u64 [%SP+1816], %rd1073;
add.u64 %rd1074, %SP, 1816;
mov.b64 %rd1075, %rd1074;
st.u64 [%SP+1248], %rd1075;
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 41
bra.uni $L__tmp1089;
$L__tmp1089:
//soa_v7.h:31 SOA_HOST_DEVICE_INLINE operator const T&() const { return val_; }
.loc 12 31 119
ld.u64 %rd1076, [%SP+1248];
setp.ne.s64 %p126, %rd1076, 0;
not.pred %p127, %p126;
not.pred %p128, %p127;
@%p128 bra $L__BB6_86;
bra.uni $L__BB6_85;
$L__BB6_85:
mov.u32 %r88, 0;
mov.b32 %r89, %r88;
bra.uni $L__BB6_86;
$L__BB6_86:
ld.u64 %rd1077, [%SP+1248];
ld.u64 %rd1078, [%rd1077];
mov.b64 %rd1079, %rd1078;
$L__tmp1090:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 41
ld.f64 %fd21, [%rd1079];
mul.f64 %fd22, %fd9, %fd21;
sub.f64 %fd23, %fd8, %fd22;
st.f64 [%SP+1784], %fd23;
add.u64 %rd1080, %SP, 1784;
mov.b64 %rd20, %rd1080;
add.u64 %rd1081, %SP, 1440;
mov.b64 %rd1082, %rd1081;
st.u64 [%SP+1288], %rd1082;
mov.b64 %rd21, %rd1;
$L__tmp1091:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 5
bra.uni $L__tmp1092;
$L__tmp1092:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6763
ld.u64 %rd1083, [%SP+1288];
mov.b64 %rd1084, %rd1083;
st.u64 [%SP+1280], %rd1084;
mov.b64 %rd1085, %rd21;
$L__tmp1093:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6763
bra.uni $L__tmp1094;
$L__tmp1094:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 9055
ld.u64 %rd1086, [%SP+1280];
setp.ne.s64 %p129, %rd1086, 0;
not.pred %p130, %p129;
not.pred %p131, %p130;
@%p131 bra $L__BB6_88;
bra.uni $L__BB6_87;
$L__BB6_87:
mov.u32 %r90, 0;
mov.b32 %r91, %r90;
bra.uni $L__BB6_88;
$L__tmp1095:
$L__BB6_88:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6782
ld.u64 %rd1087, [%SP+1288];
ld.u64 %rd1088, [%rd1087+24];
ld.u64 %rd1089, [%SP+1288];
ld.u64 %rd1090, [%rd1089+32];
ld.u64 %rd1091, [%SP+1288];
ld.u64 %rd1092, [%rd1091+40];
ld.u64 %rd1093, [%SP+1288];
ld.u64 %rd1094, [%rd1093+48];
ld.u64 %rd1095, [%SP+1288];
ld.u64 %rd1096, [%rd1095+56];
ld.u64 %rd1097, [%SP+1288];
ld.u64 %rd1098, [%rd1097+64];
add.u64 %rd1099, %SP, 1296;
mov.b64 %rd1100, %rd1099;
st.u64 [%SP+1264], %rd1100;
mov.b64 %rd1101, %rd21;
$L__tmp1096:
.loc 11 0 6782
mov.b64 %rd1102, %rd1088;
$L__tmp1097:
mov.b64 %rd1103, %rd1090;
$L__tmp1098:
mov.b64 %rd1104, %rd1092;
$L__tmp1099:
mov.b64 %rd1105, %rd1094;
$L__tmp1100:
mov.b64 %rd1106, %rd1096;
$L__tmp1101:
mov.b64 %rd1107, %rd1098;
st.u64 [%SP+1272], %rd1107;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6782
bra.uni $L__tmp1102;
$L__tmp1102:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
ld.u64 %rd1108, [%SP+1264];
$L__tmp1103:
.loc 11 0 5739
mov.b64 %rd1109, %rd1101;
$L__tmp1104:
mov.b64 %rd1110, %rd1102;
$L__tmp1105:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5739
bra.uni $L__tmp1106;
$L__tmp1106:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd1111, %rd1109, 3;
add.s64 %rd1112, %rd1110, %rd1111;
st.u64 [%rd1108], %rd1112;
$L__tmp1107:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
ld.u64 %rd1113, [%SP+1264];
mov.b64 %rd1114, %rd1101;
$L__tmp1108:
.loc 11 0 5752
mov.b64 %rd1115, %rd1103;
$L__tmp1109:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5752
bra.uni $L__tmp1110;
$L__tmp1110:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd1116, %rd1114, 3;
add.s64 %rd1117, %rd1115, %rd1116;
st.u64 [%rd1113+8], %rd1117;
$L__tmp1111:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
ld.u64 %rd1118, [%SP+1264];
mov.b64 %rd1119, %rd1101;
$L__tmp1112:
.loc 11 0 5765
mov.b64 %rd1120, %rd1104;
$L__tmp1113:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5765
bra.uni $L__tmp1114;
$L__tmp1114:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd1121, %rd1119, 3;
add.s64 %rd1122, %rd1120, %rd1121;
st.u64 [%rd1118+16], %rd1122;
$L__tmp1115:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
ld.u64 %rd1123, [%SP+1264];
mov.b64 %rd1124, %rd1101;
$L__tmp1116:
.loc 11 0 5778
mov.b64 %rd1125, %rd1105;
$L__tmp1117:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5778
bra.uni $L__tmp1118;
$L__tmp1118:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd1126, %rd1124, 1;
add.s64 %rd1127, %rd1125, %rd1126;
st.u64 [%rd1123+24], %rd1127;
$L__tmp1119:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
ld.u64 %rd1128, [%SP+1264];
mov.b64 %rd1129, %rd1101;
$L__tmp1120:
.loc 11 0 5801
mov.b64 %rd1130, %rd1106;
$L__tmp1121:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5801
bra.uni $L__tmp1122;
$L__tmp1122:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
shl.b64 %rd1131, %rd1129, 2;
add.s64 %rd1132, %rd1130, %rd1131;
st.u64 [%rd1128+32], %rd1132;
$L__tmp1123:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
ld.u64 %rd1133, [%SP+1264];
ld.u64 %rd1134, [%SP+1272];
mov.b64 %rd1135, %rd1101;
$L__tmp1124:
.loc 11 0 5822
mov.b64 %rd1136, %rd1134;
st.u64 [%SP+1256], %rd1136;
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 5822
bra.uni $L__tmp1125;
$L__tmp1125:
//soa_v7.h:29 SOA_HOST_DEVICE_INLINE SoAValue(size_t i, T * col): val_(col[i]) {}
.loc 12 29 120
ld.u64 %rd1137, [%SP+1256];
shl.b64 %rd1138, %rd1135, 3;
add.s64 %rd1139, %rd1137, %rd1138;
st.u64 [%rd1133+40], %rd1139;
$L__tmp1126:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 6782
ld.u64 %rd1140, [%SP+1288];
setp.ne.s64 %p132, %rd1140, 0;
not.pred %p133, %p132;
not.pred %p134, %p133;
@%p134 bra $L__BB6_90;
bra.uni $L__BB6_89;
$L__BB6_89:
mov.u32 %r92, 0;
mov.b32 %r93, %r92;
bra.uni $L__BB6_90;
$L__BB6_90:
ld.u64 %rd1141, [%SP+1296];
ld.u64 %rd1142, [%SP+1304];
ld.u64 %rd1143, [%SP+1312];
ld.u64 %rd1144, [%SP+1320];
ld.u64 %rd1145, [%SP+1328];
ld.u64 %rd1146, [%SP+1336];
$L__tmp1127:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 5
st.u64 [%SP+2536], %rd1146;
st.u64 [%SP+2528], %rd1145;
st.u64 [%SP+2520], %rd1144;
st.u64 [%SP+2512], %rd1143;
st.u64 [%SP+2504], %rd1142;
st.u64 [%SP+2496], %rd1141;
ld.u64 %rd1147, [%SP+2512];
st.u64 [%SP+1776], %rd1147;
add.u64 %rd1148, %SP, 1776;
mov.b64 %rd1149, %rd1148;
st.u64 [%SP+1344], %rd1149;
mov.b64 %rd22, %rd20;
$L__tmp1128:
//test_v7_cuda.cu:72 r[i].z = a[i].x * b[i].y - a[i].y * b[i].x;
.loc 18 72 5
bra.uni $L__tmp1129;
$L__tmp1129:
//soa_v7.h:35 SOA_HOST_DEVICE_INLINE T& operator= (const T2& v) { return val_ = v; }
.loc 12 35 120
ld.u64 %rd1150, [%SP+1344];
setp.ne.s64 %p135, %rd1150, 0;
not.pred %p136, %p135;
not.pred %p137, %p136;
@%p137 bra $L__BB6_92;
bra.uni $L__BB6_91;
$L__BB6_91:
mov.u32 %r94, 0;
mov.b32 %r95, %r94;
bra.uni $L__BB6_92;
$L__BB6_92:
ld.f64 %fd24, [%rd22];
ld.u64 %rd1151, [%SP+1344];
ld.u64 %rd1152, [%rd1151];
mov.b64 %rd1153, %rd1152;
st.f64 [%rd1153], %fd24;
mov.b64 %rd1154, %rd1153;
$L__tmp1130:
//test_v7_cuda.cu:73 }
.loc 18 73 3
bra.uni $L__BB6_93;
$L__BB6_93:
ret;
$L__tmp1131:
$L__func_end6:
}
.entry _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m(
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0[80],
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1[80],
.param .align 8 .b8 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2[80],
.param .u64 _ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3
)
{
.local .align 8 .b8 __local_depot7[360];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<48>;
.reg .b32 %r<36>;
.reg .f64 %fd<22>;
.reg .b64 %rd<153>;
//test_v7_cuda.cu:76 __global__ void handcraftedCrossProductSoA(testSoA::SoA r, const testSoA::SoA a, const testSoA::SoA b, size_t nElements) {
.loc 18 76 0
$L__func_begin7:
//test_v7_cuda.cu:76 __global__ void handcraftedCrossProductSoA(testSoA::SoA r, const testSoA::SoA a, const testSoA::SoA b, size_t nElements) {
.loc 18 76 0
mov.u64 %SPL, __local_depot7;
cvta.local.u64 %SP, %SPL;
ld.param.u64 %rd23, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+8];
ld.param.u64 %rd24, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+16];
ld.param.u64 %rd25, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+24];
ld.param.u64 %rd26, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+32];
ld.param.u64 %rd27, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+40];
ld.param.u64 %rd28, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+48];
ld.param.u64 %rd29, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+56];
ld.param.u64 %rd30, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+64];
ld.param.u64 %rd31, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2+72];
ld.param.u64 %rd13, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+8];
ld.param.u64 %rd14, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+16];
ld.param.u64 %rd15, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+24];
ld.param.u64 %rd16, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+32];
ld.param.u64 %rd17, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+40];
ld.param.u64 %rd18, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+48];
ld.param.u64 %rd19, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+56];
ld.param.u64 %rd20, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+64];
ld.param.u64 %rd21, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1+72];
ld.param.u64 %rd3, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+8];
ld.param.u64 %rd4, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+16];
ld.param.u64 %rd5, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+24];
ld.param.u64 %rd6, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+32];
ld.param.u64 %rd7, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+40];
ld.param.u64 %rd8, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+48];
ld.param.u64 %rd9, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+56];
ld.param.u64 %rd10, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+64];
ld.param.u64 %rd11, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0+72];
ld.param.u64 %rd32, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_3];
ld.param.u64 %rd22, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_2];
ld.param.u64 %rd12, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_1];
ld.param.u64 %rd2, [_ZN71_GLOBAL__N__47_tmpxft_00003ae1_00000000_7_test_v7_cuda_cpp1_ii_e64742fe26handcraftedCrossProductSoAEN7testSoA3SoAES1_S1_m_param_0];
st.u64 [%SP+192], %rd11;
st.u64 [%SP+184], %rd10;
st.u64 [%SP+176], %rd9;
st.u64 [%SP+168], %rd8;
st.u64 [%SP+160], %rd7;
st.u64 [%SP+152], %rd6;
st.u64 [%SP+144], %rd5;
st.u64 [%SP+136], %rd4;
st.u64 [%SP+128], %rd3;
st.u64 [%SP+120], %rd2;
st.u64 [%SP+272], %rd21;
st.u64 [%SP+264], %rd20;
st.u64 [%SP+256], %rd19;
st.u64 [%SP+248], %rd18;
st.u64 [%SP+240], %rd17;
st.u64 [%SP+232], %rd16;
st.u64 [%SP+224], %rd15;
st.u64 [%SP+216], %rd14;
st.u64 [%SP+208], %rd13;
st.u64 [%SP+200], %rd12;
st.u64 [%SP+352], %rd31;
st.u64 [%SP+344], %rd30;
st.u64 [%SP+336], %rd29;
st.u64 [%SP+328], %rd28;
st.u64 [%SP+320], %rd27;
st.u64 [%SP+312], %rd26;
st.u64 [%SP+304], %rd25;
st.u64 [%SP+296], %rd24;
st.u64 [%SP+288], %rd23;
st.u64 [%SP+280], %rd22;
$L__tmp1132:
//test_v7_cuda.cu:77 size_t i = blockIdx.x * blockDim.x + threadIdx.x;
.loc 18 77 14
mov.u32 %r1, %ctaid.x;
mov.u32 %r2, %ntid.x;
mul.lo.s32 %r3, %r1, %r2;
mov.u32 %r4, %tid.x;
add.s32 %r5, %r3, %r4;
cvt.u64.u32 %rd1, %r5;
$L__tmp1133:
//test_v7_cuda.cu:78 if (i >= nElements) return;
.loc 18 78 5
setp.ge.u64 %p1, %rd1, %rd32;
not.pred %p2, %p1;
@%p2 bra $L__BB7_2;
bra.uni $L__BB7_1;
$L__BB7_1:
$L__tmp1134:
//test_v7_cuda.cu:78 if (i >= nElements) return;
.loc 18 78 25
bra.uni $L__BB7_33;
$L__tmp1135:
$L__BB7_2:
.loc 18 0 25
add.u64 %rd33, %SP, 200;
mov.b64 %rd34, %rd33;
st.u64 [%SP+112], %rd34;
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i];
.loc 18 79 16
bra.uni $L__tmp1136;
$L__tmp1136:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 8155
ld.u64 %rd35, [%SP+112];
setp.ne.s64 %p3, %rd35, 0;
not.pred %p4, %p3;
not.pred %p5, %p4;
@%p5 bra $L__BB7_4;
bra.uni $L__BB7_3;
$L__BB7_3:
mov.u32 %r6, 0;
mov.b32 %r7, %r6;
bra.uni $L__BB7_4;
$L__BB7_4:
ld.u64 %rd36, [%SP+112];
ld.u64 %rd37, [%rd36+32];
mov.b64 %rd38, %rd37;
$L__tmp1137:
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i];
.loc 18 79 16
shl.b64 %rd39, %rd1, 3;
add.s64 %rd40, %rd38, %rd39;
ld.f64 %fd1, [%rd40];
add.u64 %rd41, %SP, 280;
mov.b64 %rd42, %rd41;
st.u64 [%SP+0], %rd42;
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i];
.loc 18 79 27
bra.uni $L__tmp1138;
$L__tmp1138:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 8155
ld.u64 %rd43, [%SP+0];
setp.ne.s64 %p6, %rd43, 0;
not.pred %p7, %p6;
not.pred %p8, %p7;
@%p8 bra $L__BB7_6;
bra.uni $L__BB7_5;
$L__BB7_5:
mov.u32 %r8, 0;
mov.b32 %r9, %r8;
bra.uni $L__BB7_6;
$L__BB7_6:
ld.u64 %rd44, [%SP+0];
ld.u64 %rd45, [%rd44+32];
mov.b64 %rd46, %rd45;
$L__tmp1139:
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i];
.loc 18 79 27
shl.b64 %rd47, %rd1, 3;
add.s64 %rd48, %rd46, %rd47;
ld.f64 %fd13, [%rd48];
mul.f64 %fd2, %fd1, %fd13;
add.u64 %rd49, %SP, 200;
mov.b64 %rd50, %rd49;
st.u64 [%SP+8], %rd50;
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i];
.loc 18 79 38
bra.uni $L__tmp1140;
$L__tmp1140:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 8282
ld.u64 %rd51, [%SP+8];
setp.ne.s64 %p9, %rd51, 0;
not.pred %p10, %p9;
not.pred %p11, %p10;
@%p11 bra $L__BB7_8;
bra.uni $L__BB7_7;
$L__BB7_7:
mov.u32 %r10, 0;
mov.b32 %r11, %r10;
bra.uni $L__BB7_8;
$L__BB7_8:
ld.u64 %rd52, [%SP+8];
ld.u64 %rd53, [%rd52+40];
mov.b64 %rd54, %rd53;
$L__tmp1141:
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i];
.loc 18 79 38
shl.b64 %rd55, %rd1, 3;
add.s64 %rd56, %rd54, %rd55;
ld.f64 %fd3, [%rd56];
add.u64 %rd57, %SP, 280;
mov.b64 %rd58, %rd57;
st.u64 [%SP+16], %rd58;
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i];
.loc 18 79 49
bra.uni $L__tmp1142;
$L__tmp1142:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 8155
ld.u64 %rd59, [%SP+16];
setp.ne.s64 %p12, %rd59, 0;
not.pred %p13, %p12;
not.pred %p14, %p13;
@%p14 bra $L__BB7_10;
bra.uni $L__BB7_9;
$L__BB7_9:
mov.u32 %r12, 0;
mov.b32 %r13, %r12;
bra.uni $L__BB7_10;
$L__BB7_10:
ld.u64 %rd60, [%SP+16];
ld.u64 %rd61, [%rd60+32];
mov.b64 %rd62, %rd61;
$L__tmp1143:
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i];
.loc 18 79 49
shl.b64 %rd63, %rd1, 3;
add.s64 %rd64, %rd62, %rd63;
ld.f64 %fd14, [%rd64];
mul.f64 %fd15, %fd3, %fd14;
sub.f64 %fd4, %fd2, %fd15;
add.u64 %rd65, %SP, 120;
mov.b64 %rd66, %rd65;
st.u64 [%SP+24], %rd66;
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i];
.loc 18 79 5
bra.uni $L__tmp1144;
$L__tmp1144:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 7158
ld.u64 %rd67, [%SP+24];
setp.ne.s64 %p15, %rd67, 0;
not.pred %p16, %p15;
not.pred %p17, %p16;
@%p17 bra $L__BB7_12;
bra.uni $L__BB7_11;
$L__BB7_11:
mov.u32 %r14, 0;
mov.b32 %r15, %r14;
bra.uni $L__BB7_12;
$L__BB7_12:
ld.u64 %rd68, [%SP+24];
ld.u64 %rd69, [%rd68+24];
mov.b64 %rd70, %rd69;
$L__tmp1145:
//test_v7_cuda.cu:79 r.x()[i] = a.y()[i] * b.y()[i] - a.z()[i] * b.y()[i];
.loc 18 79 5
shl.b64 %rd71, %rd1, 3;
add.s64 %rd72, %rd70, %rd71;
st.f64 [%rd72], %fd4;
add.u64 %rd73, %SP, 200;
mov.b64 %rd74, %rd73;
st.u64 [%SP+32], %rd74;
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i];
.loc 18 80 16
bra.uni $L__tmp1146;
$L__tmp1146:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 8282
ld.u64 %rd75, [%SP+32];
setp.ne.s64 %p18, %rd75, 0;
not.pred %p19, %p18;
not.pred %p20, %p19;
@%p20 bra $L__BB7_14;
bra.uni $L__BB7_13;
$L__BB7_13:
mov.u32 %r16, 0;
mov.b32 %r17, %r16;
bra.uni $L__BB7_14;
$L__BB7_14:
ld.u64 %rd76, [%SP+32];
ld.u64 %rd77, [%rd76+40];
mov.b64 %rd78, %rd77;
$L__tmp1147:
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i];
.loc 18 80 16
shl.b64 %rd79, %rd1, 3;
add.s64 %rd80, %rd78, %rd79;
ld.f64 %fd5, [%rd80];
add.u64 %rd81, %SP, 280;
mov.b64 %rd82, %rd81;
st.u64 [%SP+40], %rd82;
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i];
.loc 18 80 27
bra.uni $L__tmp1148;
$L__tmp1148:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 8028
ld.u64 %rd83, [%SP+40];
setp.ne.s64 %p21, %rd83, 0;
not.pred %p22, %p21;
not.pred %p23, %p22;
@%p23 bra $L__BB7_16;
bra.uni $L__BB7_15;
$L__BB7_15:
mov.u32 %r18, 0;
mov.b32 %r19, %r18;
bra.uni $L__BB7_16;
$L__BB7_16:
ld.u64 %rd84, [%SP+40];
ld.u64 %rd85, [%rd84+24];
mov.b64 %rd86, %rd85;
$L__tmp1149:
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i];
.loc 18 80 27
shl.b64 %rd87, %rd1, 3;
add.s64 %rd88, %rd86, %rd87;
ld.f64 %fd16, [%rd88];
mul.f64 %fd6, %fd5, %fd16;
add.u64 %rd89, %SP, 200;
mov.b64 %rd90, %rd89;
st.u64 [%SP+48], %rd90;
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i];
.loc 18 80 38
bra.uni $L__tmp1150;
$L__tmp1150:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 8028
ld.u64 %rd91, [%SP+48];
setp.ne.s64 %p24, %rd91, 0;
not.pred %p25, %p24;
not.pred %p26, %p25;
@%p26 bra $L__BB7_18;
bra.uni $L__BB7_17;
$L__BB7_17:
mov.u32 %r20, 0;
mov.b32 %r21, %r20;
bra.uni $L__BB7_18;
$L__BB7_18:
ld.u64 %rd92, [%SP+48];
ld.u64 %rd93, [%rd92+24];
mov.b64 %rd94, %rd93;
$L__tmp1151:
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i];
.loc 18 80 38
shl.b64 %rd95, %rd1, 3;
add.s64 %rd96, %rd94, %rd95;
ld.f64 %fd7, [%rd96];
add.u64 %rd97, %SP, 280;
mov.b64 %rd98, %rd97;
st.u64 [%SP+56], %rd98;
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i];
.loc 18 80 49
bra.uni $L__tmp1152;
$L__tmp1152:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 8282
ld.u64 %rd99, [%SP+56];
setp.ne.s64 %p27, %rd99, 0;
not.pred %p28, %p27;
not.pred %p29, %p28;
@%p29 bra $L__BB7_20;
bra.uni $L__BB7_19;
$L__BB7_19:
mov.u32 %r22, 0;
mov.b32 %r23, %r22;
bra.uni $L__BB7_20;
$L__BB7_20:
ld.u64 %rd100, [%SP+56];
ld.u64 %rd101, [%rd100+40];
mov.b64 %rd102, %rd101;
$L__tmp1153:
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i];
.loc 18 80 49
shl.b64 %rd103, %rd1, 3;
add.s64 %rd104, %rd102, %rd103;
ld.f64 %fd17, [%rd104];
mul.f64 %fd18, %fd7, %fd17;
sub.f64 %fd8, %fd6, %fd18;
add.u64 %rd105, %SP, 120;
mov.b64 %rd106, %rd105;
st.u64 [%SP+64], %rd106;
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i];
.loc 18 80 5
bra.uni $L__tmp1154;
$L__tmp1154:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 7273
ld.u64 %rd107, [%SP+64];
setp.ne.s64 %p30, %rd107, 0;
not.pred %p31, %p30;
not.pred %p32, %p31;
@%p32 bra $L__BB7_22;
bra.uni $L__BB7_21;
$L__BB7_21:
mov.u32 %r24, 0;
mov.b32 %r25, %r24;
bra.uni $L__BB7_22;
$L__BB7_22:
ld.u64 %rd108, [%SP+64];
ld.u64 %rd109, [%rd108+32];
mov.b64 %rd110, %rd109;
$L__tmp1155:
//test_v7_cuda.cu:80 r.y()[i] = a.z()[i] * b.x()[i] - a.x()[i] * b.z()[i];
.loc 18 80 5
shl.b64 %rd111, %rd1, 3;
add.s64 %rd112, %rd110, %rd111;
st.f64 [%rd112], %fd8;
add.u64 %rd113, %SP, 200;
mov.b64 %rd114, %rd113;
st.u64 [%SP+72], %rd114;
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i];
.loc 18 81 16
bra.uni $L__tmp1156;
$L__tmp1156:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 8028
ld.u64 %rd115, [%SP+72];
setp.ne.s64 %p33, %rd115, 0;
not.pred %p34, %p33;
not.pred %p35, %p34;
@%p35 bra $L__BB7_24;
bra.uni $L__BB7_23;
$L__BB7_23:
mov.u32 %r26, 0;
mov.b32 %r27, %r26;
bra.uni $L__BB7_24;
$L__BB7_24:
ld.u64 %rd116, [%SP+72];
ld.u64 %rd117, [%rd116+24];
mov.b64 %rd118, %rd117;
$L__tmp1157:
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i];
.loc 18 81 16
shl.b64 %rd119, %rd1, 3;
add.s64 %rd120, %rd118, %rd119;
ld.f64 %fd9, [%rd120];
add.u64 %rd121, %SP, 280;
mov.b64 %rd122, %rd121;
st.u64 [%SP+80], %rd122;
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i];
.loc 18 81 27
bra.uni $L__tmp1158;
$L__tmp1158:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 8155
ld.u64 %rd123, [%SP+80];
setp.ne.s64 %p36, %rd123, 0;
not.pred %p37, %p36;
not.pred %p38, %p37;
@%p38 bra $L__BB7_26;
bra.uni $L__BB7_25;
$L__BB7_25:
mov.u32 %r28, 0;
mov.b32 %r29, %r28;
bra.uni $L__BB7_26;
$L__BB7_26:
ld.u64 %rd124, [%SP+80];
ld.u64 %rd125, [%rd124+32];
mov.b64 %rd126, %rd125;
$L__tmp1159:
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i];
.loc 18 81 27
shl.b64 %rd127, %rd1, 3;
add.s64 %rd128, %rd126, %rd127;
ld.f64 %fd19, [%rd128];
mul.f64 %fd10, %fd9, %fd19;
add.u64 %rd129, %SP, 200;
mov.b64 %rd130, %rd129;
st.u64 [%SP+88], %rd130;
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i];
.loc 18 81 38
bra.uni $L__tmp1160;
$L__tmp1160:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 8155
ld.u64 %rd131, [%SP+88];
setp.ne.s64 %p39, %rd131, 0;
not.pred %p40, %p39;
not.pred %p41, %p40;
@%p41 bra $L__BB7_28;
bra.uni $L__BB7_27;
$L__BB7_27:
mov.u32 %r30, 0;
mov.b32 %r31, %r30;
bra.uni $L__BB7_28;
$L__BB7_28:
ld.u64 %rd132, [%SP+88];
ld.u64 %rd133, [%rd132+32];
mov.b64 %rd134, %rd133;
$L__tmp1161:
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i];
.loc 18 81 38
shl.b64 %rd135, %rd1, 3;
add.s64 %rd136, %rd134, %rd135;
ld.f64 %fd11, [%rd136];
add.u64 %rd137, %SP, 280;
mov.b64 %rd138, %rd137;
st.u64 [%SP+96], %rd138;
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i];
.loc 18 81 49
bra.uni $L__tmp1162;
$L__tmp1162:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 8028
ld.u64 %rd139, [%SP+96];
setp.ne.s64 %p42, %rd139, 0;
not.pred %p43, %p42;
not.pred %p44, %p43;
@%p44 bra $L__BB7_30;
bra.uni $L__BB7_29;
$L__BB7_29:
mov.u32 %r32, 0;
mov.b32 %r33, %r32;
bra.uni $L__BB7_30;
$L__BB7_30:
ld.u64 %rd140, [%SP+96];
ld.u64 %rd141, [%rd140+24];
mov.b64 %rd142, %rd141;
$L__tmp1163:
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i];
.loc 18 81 49
shl.b64 %rd143, %rd1, 3;
add.s64 %rd144, %rd142, %rd143;
ld.f64 %fd20, [%rd144];
mul.f64 %fd21, %fd11, %fd20;
sub.f64 %fd12, %fd10, %fd21;
add.u64 %rd145, %SP, 120;
mov.b64 %rd146, %rd145;
st.u64 [%SP+104], %rd146;
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i];
.loc 18 81 5
bra.uni $L__tmp1164;
$L__tmp1164:
//soa_v7_cuda.h:36 declare_SoA_template(SoA,
.loc 11 36 7388
ld.u64 %rd147, [%SP+104];
setp.ne.s64 %p45, %rd147, 0;
not.pred %p46, %p45;
not.pred %p47, %p46;
@%p47 bra $L__BB7_32;
bra.uni $L__BB7_31;
$L__BB7_31:
mov.u32 %r34, 0;
mov.b32 %r35, %r34;
bra.uni $L__BB7_32;
$L__BB7_32:
ld.u64 %rd148, [%SP+104];
ld.u64 %rd149, [%rd148+40];
mov.b64 %rd150, %rd149;
$L__tmp1165:
//test_v7_cuda.cu:81 r.z()[i] = a.x()[i] * b.y()[i] - a.y()[i] * b.x()[i];
.loc 18 81 5
shl.b64 %rd151, %rd1, 3;
add.s64 %rd152, %rd150, %rd151;
st.f64 [%rd152], %fd12;
//test_v7_cuda.cu:82 }
.loc 18 82 3
bra.uni $L__BB7_33;
$L__BB7_33:
ret;
$L__tmp1166:
$L__func_end7:
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment