Skip to content

Instantly share code, notes, and snippets.

@markdewing
Last active October 13, 2023 20:45
Show Gist options
  • Save markdewing/12143bb6679c977a5191280fc909f31e to your computer and use it in GitHub Desktop.
Save markdewing/12143bb6679c977a5191280fc909f31e to your computer and use it in GitHub Desktop.
PTX and SASS from assignment-after-reduction OpenMP offload bug. Look for the variable ratioGradRef_local
// .weak __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 // -- Begin function __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783
.weak .entry __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783(
.param .u64 __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_0,
.param .u64 __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_1,
.param .u64 __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_2,
.param .u64 __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_3,
.param .u64 __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_4,
.param .u64 __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_5,
.param .u64 __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_6,
.param .u64 __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_7,
.param .u64 __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_8,
.param .u64 __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_9,
.param .u64 __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_10,
.param .u64 __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_11
)
.maxntid 128, 1, 1 // @__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783
{
.local .align 8 .b8 __local_depot4[24];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<210>;
.reg .b16 %rs<13>;
.reg .b32 %r<492>;
.reg .b64 %rd<524>;
.reg .f64 %fd<294>;
.loc 1 783 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:0
$L__func_begin4:
.loc 1 783 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:0
// demoted variable
.shared .align 16 .u64 NumPtcls1_shared2_$_0;
// demoted variable
.shared .align 16 .u64 confgListOccup_ptr2_shared3_$_0;
// demoted variable
.shared .align 16 .u64 psiV_temp_list_ptr3_shared4_$_0;
// demoted variable
.shared .align 16 .u64 iw_shared5_$_0;
// demoted variable
.shared .align 16 .u64 psiV_list_devptr4_shared6_$_0;
// demoted variable
.shared .align 16 .b8 ratioGradRef_local_shared[24];
// demoted variable
.shared .align 16 .u64 psiMinv_temp_list_devptr5_shared_$_0;
// demoted variable
.shared .align 16 .u64 psiMinv_cols6_shared_$_0;
// demoted variable
.shared .align 16 .u32 WorkingIndex7_shared7_$_0;
// demoted variable
.shared .align 16 .u64 dpsiV_list_ptr8_shared_$_0;
// demoted variable
.shared .align 16 .f64 c_ratio_shared8_$_0;
// %bb.0: // %entry
mov.u64 %SPL, __local_depot4;
$L__tmp116:
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
mov.u32 %r1, %tid.x;
setp.ne.s32 %p6, %r1, 0;
mov.u32 %r471, 0;
@%p6 bra $L__BB4_2;
// %bb.1: // %if.then.i.i
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u32 %r95, 1;
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
st.shared.u32 [IsSPMDMode], %r95;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r471;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r471;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r95;
$L__BB4_2: // %__kmpc_target_init.exit
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
ld.param.u64 %rd213, [__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_0];
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
bar.sync 0;
bar.sync 0;
$L__tmp117:
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
@%p6 bra $L__BB4_4;
// %bb.3: // %region.guarded27.i
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
ld.param.u64 %rd221, [__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_8];
ld.param.u64 %rd220, [__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_7];
ld.param.u64 %rd219, [__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_6];
ld.param.u64 %rd218, [__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_5];
ld.param.u64 %rd217, [__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_4];
ld.param.u64 %rd216, [__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_3];
ld.param.u64 %rd215, [__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_2];
ld.param.u64 %rd214, [__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_1];
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
st.shared.u64 [NumPtcls1_shared2_$_0], %rd214;
st.shared.u64 [confgListOccup_ptr2_shared3_$_0], %rd215;
st.shared.u64 [psiV_temp_list_ptr3_shared4_$_0], %rd216;
st.shared.u64 [psiV_list_devptr4_shared6_$_0], %rd217;
st.shared.u64 [psiMinv_temp_list_devptr5_shared_$_0], %rd218;
st.shared.u64 [psiMinv_cols6_shared_$_0], %rd219;
st.shared.u32 [WorkingIndex7_shared7_$_0], %rd220;
st.shared.u64 [dpsiV_list_ptr8_shared_$_0], %rd221;
$L__BB4_4: // %region.barrier25.i
bar.sync 0;
.loc 1 785 30 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:785:30
shl.b64 %rd6, %rd213, 32;
.loc 1 785 34 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:785:34
@%p6 bra $L__BB4_6;
// %bb.5: // %region.guarded32.i
.loc 1 0 34 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:34
mov.u64 %rd227, 0;
.loc 1 785 34 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:785:34
st.shared.u64 [iw_shared5_$_0], %rd227;
$L__BB4_6: // %region.barrier30.i
bar.sync 0;
.loc 1 785 5 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:785:5
setp.eq.s64 %p9, %rd6, 0;
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
@%p9 bra $L__BB4_232;
// %bb.7: // %omp.precond.then.i
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
cvt.s64.s32 %rd7, %rd213;
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
mov.u32 %r97, %ctaid.x;
mov.u32 %r98, %nctaid.x;
cvt.s64.s32 %rd9, %r97;
cvt.s64.s32 %rd10, %r98;
or.b64 %rd228, %rd7, %rd10;
and.b64 %rd229, %rd228, -4294967296;
setp.ne.s64 %p10, %rd229, 0;
@%p10 bra $L__BB4_9;
bra.uni $L__BB4_8;
$L__BB4_9:
div.u64 %rd458, %rd7, %rd10;
bra.uni $L__BB4_10;
$L__BB4_8:
cvt.u32.u64 %r99, %rd10;
cvt.u32.u64 %r100, %rd7;
div.u32 %r101, %r100, %r99;
cvt.u64.u32 %rd458, %r101;
$L__BB4_10:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
add.s64 %rd8, %rd7, -1;
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
mul.lo.s64 %rd230, %rd458, %rd10;
sub.s64 %rd14, %rd7, %rd230;
setp.le.u64 %p11, %rd14, %rd9;
@%p11 bra $L__BB4_12;
// %bb.11: // %if.then.i105.i.i.i
add.s64 %rd232, %rd458, 1;
mul.lo.s64 %rd461, %rd232, %rd9;
bra.uni $L__BB4_13;
$L__BB4_12: // %if.else.i94.i.i.i
mul.lo.s64 %rd231, %rd458, %rd9;
add.s64 %rd461, %rd14, %rd231;
add.s64 %rd458, %rd458, -1;
$L__BB4_13: // %__kmpc_distribute_static_init_8u.exit.i
add.s64 %rd233, %rd461, %rd458;
.loc 1 785 10 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:785:10
min.u64 %rd234, %rd233, %rd8;
add.s64 %rd20, %rd234, 1;
.loc 1 785 5 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:785:5
setp.ge.u64 %p12, %rd461, %rd20;
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
@%p12 bra $L__BB4_232;
// %bb.14: // %region.check.tid.lr.ph.i
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
ld.param.u64 %rd222, [__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_11];
ld.param.u64 %rd223, [__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_10];
ld.param.u64 %rd224, [__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_9];
cvta.to.global.u64 %rd1, %rd222;
cvta.to.global.u64 %rd2, %rd223;
cvta.to.global.u64 %rd3, %rd224;
add.u64 %rd4, %SPL, 0;
mov.u32 %r470, 1;
mov.u32 %r2, %ntid.x;
add.s32 %r102, %r2, -1;
and.b32 %r3, %r102, -32;
shr.u32 %r4, %r1, 5;
cvt.u16.u32 %rs1, %r1;
and.b16 %rs2, %rs1, 31;
and.b32 %r5, %r1, 31;
shr.s32 %r103, %r1, 5;
add.s32 %r6, %r2, -32;
mul.wide.s32 %rd235, %r103, 4;
mov.u64 %rd236, __openmp_nvptx_data_transfer_temporary_storage;
add.s64 %rd21, %rd236, %rd235;
mul.wide.s32 %rd237, %r1, 4;
add.s64 %rd22, %rd236, %rd237;
mov.f64 %fd143, 0d0000000000000000;
setp.ne.s32 %p82, %r1, %r3;
setp.ne.s32 %p87, %r5, 0;
mov.pred %p169, 0;
bra.uni $L__BB4_15;
$L__BB4_231: // %region.barrier20.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 805 33 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:805:33
bar.sync 0;
.loc 1 785 5 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:785:5
add.s64 %rd461, %rd461, 1;
setp.lt.u64 %p207, %rd461, %rd20;
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
@%p207 bra $L__BB4_15;
bra.uni $L__BB4_232;
$L__BB4_15: // %region.check.tid.i
// =>This Loop Header: Depth=1
// Child Loop BB4_33 Depth 2
// Child Loop BB4_34 Depth 3
// Child Loop BB4_44 Depth 2
// Child Loop BB4_50 Depth 2
// Child Loop BB4_67 Depth 2
// Child Loop BB4_81 Depth 2
// Child Loop BB4_82 Depth 3
// Child Loop BB4_89 Depth 2
// Child Loop BB4_95 Depth 2
// Child Loop BB4_112 Depth 2
// Child Loop BB4_133 Depth 2
// Child Loop BB4_134 Depth 3
// Child Loop BB4_144 Depth 2
// Child Loop BB4_176 Depth 2
// Child Loop BB4_187 Depth 2
// Child Loop BB4_188 Depth 3
// Child Loop BB4_195 Depth 2
// Child Loop BB4_222 Depth 2
.loc 1 785 34 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:785:34
@%p6 bra $L__BB4_17;
// %bb.16: // %region.guarded.i
// in Loop: Header=BB4_15 Depth=1
st.shared.u64 [iw_shared5_$_0], %rd461;
$L__BB4_17: // %region.barrier.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
$L__tmp118:
.loc 2 83 12 // qmcpack/src/Containers/OhmmsPETE/TinyVector.h:83:12
@%p6 bra $L__BB4_19;
// %bb.18: // %region.guarded.i.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 2 0 12 // qmcpack/src/Containers/OhmmsPETE/TinyVector.h:0:12
mov.u64 %rd238, 0;
.loc 2 83 12 // qmcpack/src/Containers/OhmmsPETE/TinyVector.h:83:12
st.shared.u64 [ratioGradRef_local_shared], %rd238;
$L__BB4_19: // %region.barrier.i.i.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
@%p6 bra $L__BB4_21;
// %bb.20: // %region.guarded.1.i.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 2 0 12 // qmcpack/src/Containers/OhmmsPETE/TinyVector.h:0:12
mov.u64 %rd239, 0;
.loc 2 83 12 // qmcpack/src/Containers/OhmmsPETE/TinyVector.h:83:12
st.shared.u64 [ratioGradRef_local_shared+8], %rd239;
$L__BB4_21: // %region.barrier.1.i.i.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
@%p6 bra $L__BB4_23;
// %bb.22: // %region.guarded.2.i.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 2 0 12 // qmcpack/src/Containers/OhmmsPETE/TinyVector.h:0:12
mov.u64 %rd240, 0;
.loc 2 83 12 // qmcpack/src/Containers/OhmmsPETE/TinyVector.h:83:12
st.shared.u64 [ratioGradRef_local_shared+16], %rd240;
$L__BB4_23: // %_ZN11qmcplusplus10TinyVectorIdLj3EEC1ERKd.internalized.exit.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
$L__tmp119:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
ld.shared.u32 %r104, [IsSPMDMode];
setp.eq.s32 %p17, %r104, 0;
selp.b32 %r105, -32, 0, %p17;
add.s32 %r106, %r105, %r2;
setp.lt.u32 %p18, %r106, 32;
and.b32 %r107, %r106, -32;
selp.b32 %r7, 1, %r107, %p18;
selp.b32 %r108, %r7, %r2, %p17;
setp.eq.s32 %p19, %r108, %r106;
selp.b32 %r8, 0, %r108, %p19;
@%p17 bra $L__BB4_77;
$L__tmp120:
// %bb.24: // %if.then32.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
setp.ne.s32 %p20, %r1, 0;
$L__tmp121:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
bar.sync 0;
@%p20 bra $L__BB4_26;
// %bb.25: // %if.else.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r8;
mov.u32 %r109, 1;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r109;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r109;
$L__BB4_26: // %_ZN4ompx5state9ValueRAIIINS0_5ValueIjLNS0_9ValueKindE1EEEjEC1ERS4_jjbP7IdentTyb.exit.i.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
bar.sync 0;
add.s32 %r110, %r8, -1;
setp.lt.u32 %p21, %r110, %r1;
@%p21 bra $L__BB4_74;
// %bb.27: // %if.then40.i.i
// in Loop: Header=BB4_15 Depth=1
$L__tmp122:
.loc 1 789 30 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:30
ld.shared.u64 %rd241, [NumPtcls1_shared2_$_0];
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
setp.eq.s64 %p22, %rd241, 0;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p22 bra $L__BB4_74;
// %bb.28: // %omp.precond.then.i986.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r9, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p23, %r9, 1;
mov.u32 %r472, %r470;
@%p23 bra $L__BB4_31;
// %bb.29: // %cond.false.i.i.i1077.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r472, [_ZN4ompx5state9TeamStateE_$_2];
setp.ne.s32 %p24, %r472, 0;
@%p24 bra $L__BB4_31;
// %bb.30: // %cond.false.i.i.i.i1079.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r112, [IsSPMDMode];
setp.eq.s32 %p25, %r112, 0;
selp.b32 %r113, -32, 0, %p25;
add.s32 %r472, %r113, %r2;
$L__BB4_31: // %__kmpc_for_static_init_8u.exit.i988.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
add.s64 %rd24, %rd241, -1;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
ld.shared.u32 %r10, [_ZN4ompx5state9TeamStateE_$_1];
setp.eq.s32 %p26, %r9, 0;
setp.eq.s32 %p27, %r10, %r9;
selp.b32 %r114, %r1, 0, %p27;
selp.b32 %r115, 0, %r114, %p26;
cvt.s64.s32 %rd466, %r115;
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
setp.lt.u64 %p28, %rd24, %rd466;
mov.f64 %fd248, 0d0000000000000000;
mov.f64 %fd247, %fd248;
mov.f64 %fd246, %fd248;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p28 bra $L__BB4_36;
// %bb.32: // %omp.inner.for.cond.preheader.lr.ph.i996.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
cvt.s64.s32 %rd26, %r472;
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd465, %rd466, 1;
$L__tmp123:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
ld.shared.u64 %rd242, [confgListOccup_ptr2_shared3_$_0];
ld.shared.u64 %rd243, [psiV_list_devptr4_shared6_$_0];
ld.shared.u64 %rd244, [iw_shared5_$_0];
shl.b64 %rd245, %rd244, 3;
add.s64 %rd246, %rd243, %rd245;
ld.u64 %rd28, [%rd246];
ld.shared.u64 %rd247, [psiV_temp_list_ptr3_shared4_$_0];
add.s64 %rd248, %rd247, %rd245;
ld.u64 %rd249, [%rd248];
ld.shared.u64 %rd250, [psiMinv_temp_list_devptr5_shared_$_0];
add.s64 %rd251, %rd250, %rd245;
ld.u64 %rd252, [%rd251];
ld.shared.u64 %rd253, [psiMinv_cols6_shared_$_0];
ld.shared.s32 %rd254, [WorkingIndex7_shared7_$_0];
ld.shared.u64 %rd255, [dpsiV_list_ptr8_shared_$_0];
add.s64 %rd256, %rd255, %rd245;
ld.u64 %rd29, [%rd256];
$L__tmp124:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
mul.lo.s64 %rd257, %rd253, %rd466;
add.s64 %rd258, %rd257, %rd254;
shl.b64 %rd259, %rd258, 3;
add.s64 %rd464, %rd252, %rd259;
shl.b64 %rd31, %rd253, 3;
mul.lo.s64 %rd32, %rd31, %rd26;
shl.b64 %rd260, %rd466, 3;
add.s64 %rd463, %rd249, %rd260;
shl.b64 %rd34, %rd26, 3;
add.s64 %rd462, %rd242, %rd260;
mov.f64 %fd246, 0d0000000000000000;
mov.u64 %rd467, %rd466;
mov.f64 %fd247, %fd246;
mov.f64 %fd248, %fd246;
$L__BB4_33: // %omp.inner.for.cond.preheader.i1002.i.i
// Parent Loop BB4_15 Depth=1
// => This Loop Header: Depth=2
// Child Loop BB4_34 Depth 3
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u64 %rd468, %rd462;
mov.u64 %rd469, %rd463;
mov.u64 %rd470, %rd464;
mov.u64 %rd471, %rd467;
$L__BB4_34: // %omp.inner.for.body.i1003.i.i
// Parent Loop BB4_15 Depth=1
// Parent Loop BB4_33 Depth=2
// => This Inner Loop Header: Depth=3
.loc 1 791 26 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:791:26
ld.u64 %rd261, [%rd468];
.loc 1 792 37 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:792:37
shl.b64 %rd262, %rd261, 3;
add.s64 %rd263, %rd28, %rd262;
ld.f64 %fd86, [%rd263];
.loc 1 792 35 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:792:35
st.f64 [%rd469], %fd86;
.loc 1 793 95 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:793:95
mul.lo.s64 %rd264, %rd261, 24;
add.s64 %rd265, %rd29, %rd264;
.loc 1 793 93 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:793:93
ld.f64 %fd87, [%rd470];
$L__tmp125:
.loc 3 297 34 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:297:34
ld.f64 %fd88, [%rd265];
.loc 3 297 51 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:297:51
ld.f64 %fd89, [%rd265+8];
.loc 3 297 68 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:297:68
ld.f64 %fd90, [%rd265+16];
$L__tmp126:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
fma.rn.f64 %fd246, %fd88, %fd87, %fd246;
$L__tmp127:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
fma.rn.f64 %fd247, %fd89, %fd87, %fd247;
$L__tmp128:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
fma.rn.f64 %fd248, %fd90, %fd87, %fd248;
$L__tmp129:
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd471, %rd471, 1;
add.s64 %rd470, %rd470, %rd31;
add.s64 %rd469, %rd469, 8;
add.s64 %rd468, %rd468, 8;
setp.lt.u64 %p29, %rd471, %rd465;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p29 bra $L__BB4_34;
// %bb.35: // %omp.dispatch.inc.i1012.i.i
// in Loop: Header=BB4_33 Depth=2
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd467, %rd467, %rd26;
add.s64 %rd266, %rd466, %rd26;
.loc 1 789 12 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:12
min.u64 %rd466, %rd266, %rd24;
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd465, %rd466, 1;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
add.s64 %rd464, %rd464, %rd32;
add.s64 %rd463, %rd463, %rd34;
add.s64 %rd462, %rd462, %rd34;
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
setp.lt.u64 %p30, %rd467, %rd465;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p30 bra $L__BB4_33;
$L__tmp130:
$L__BB4_36: // %omp.dispatch.end.i1017.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
st.local.f64 [%rd4], %fd246;
st.local.f64 [%rd4+8], %fd247;
st.local.f64 [%rd4+16], %fd248;
mov.u32 %r473, 1;
$L__tmp131:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p23 bra $L__BB4_39;
// %bb.37: // %cond.false.i.i19.i.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r473, [_ZN4ompx5state9TeamStateE_$_2];
setp.ne.s32 %p34, %r473, 0;
@%p34 bra $L__BB4_39;
// %bb.38: // %cond.false.i.i.i20.i.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r118, [IsSPMDMode];
setp.eq.s32 %p35, %r118, 0;
selp.b32 %r119, -32, 0, %p35;
add.s32 %r473, %r119, %r2;
$L__BB4_39: // %omp_get_num_threads.exit.i.i1023.i.i
// in Loop: Header=BB4_15 Depth=1
setp.eq.s32 %p36, %r473, 1;
@%p36 bra $L__BB4_73;
// %bb.40: // %if.end5.i.i.i1025.i.i
// in Loop: Header=BB4_15 Depth=1
mov.b64 %rd474, %fd248;
mov.b64 %rd473, %fd246;
mov.b64 %rd472, %fd247;
add.s32 %r18, %r473, 31;
shr.u32 %r477, %r18, 5;
and.b32 %r475, %r473, 31;
setp.ne.s32 %p37, %r475, 0;
add.s32 %r120, %r477, -1;
setp.ge.u32 %p38, %r4, %r120;
and.pred %p39, %p37, %p38;
@%p39 bra $L__BB4_42;
// %bb.41: // %for.body.i.i.preheader.i.i1070.i.i
// in Loop: Header=BB4_15 Depth=1
$L__tmp132:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
// begin inline asm
mov.b64 {%r134,%r135}, %rd473;
// end inline asm
shfl.sync.down.b32 %r137, %r135, 16, 31, -1;
shfl.sync.down.b32 %r136, %r134, 16, 31, -1;
// begin inline asm
mov.b64 %rd274, {%r136,%r137};
// end inline asm
// begin inline asm
mov.b64 {%r138,%r139}, %rd472;
// end inline asm
shfl.sync.down.b32 %r141, %r139, 16, 31, -1;
shfl.sync.down.b32 %r140, %r138, 16, 31, -1;
// begin inline asm
mov.b64 %rd276, {%r140,%r141};
// end inline asm
// begin inline asm
mov.b64 {%r142,%r143}, %rd474;
// end inline asm
shfl.sync.down.b32 %r145, %r143, 16, 31, -1;
shfl.sync.down.b32 %r144, %r142, 16, 31, -1;
// begin inline asm
mov.b64 %rd278, {%r144,%r145};
// end inline asm
mov.b64 %fd100, %rd276;
mov.b64 %fd101, %rd274;
$L__tmp133:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd102, %fd246, %fd101;
$L__tmp134:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd103, %fd247, %fd100;
$L__tmp135:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %fd104, %rd278;
$L__tmp136:
add.f64 %fd105, %fd248, %fd104;
$L__tmp137:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd279, %fd102;
// begin inline asm
mov.b64 {%r146,%r147}, %rd279;
// end inline asm
shfl.sync.down.b32 %r149, %r147, 8, 31, -1;
shfl.sync.down.b32 %r148, %r146, 8, 31, -1;
// begin inline asm
mov.b64 %rd280, {%r148,%r149};
// end inline asm
mov.b64 %rd281, %fd103;
// begin inline asm
mov.b64 {%r150,%r151}, %rd281;
// end inline asm
shfl.sync.down.b32 %r153, %r151, 8, 31, -1;
shfl.sync.down.b32 %r152, %r150, 8, 31, -1;
// begin inline asm
mov.b64 %rd282, {%r152,%r153};
// end inline asm
mov.b64 %rd283, %fd105;
// begin inline asm
mov.b64 {%r154,%r155}, %rd283;
// end inline asm
shfl.sync.down.b32 %r157, %r155, 8, 31, -1;
shfl.sync.down.b32 %r156, %r154, 8, 31, -1;
// begin inline asm
mov.b64 %rd284, {%r156,%r157};
// end inline asm
mov.b64 %fd106, %rd284;
mov.b64 %fd107, %rd282;
mov.b64 %fd108, %rd280;
$L__tmp138:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd109, %fd102, %fd108;
$L__tmp139:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd110, %fd103, %fd107;
$L__tmp140:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd111, %fd105, %fd106;
$L__tmp141:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd285, %fd109;
// begin inline asm
mov.b64 {%r158,%r159}, %rd285;
// end inline asm
shfl.sync.down.b32 %r161, %r159, 4, 31, -1;
shfl.sync.down.b32 %r160, %r158, 4, 31, -1;
// begin inline asm
mov.b64 %rd286, {%r160,%r161};
// end inline asm
mov.b64 %rd287, %fd110;
// begin inline asm
mov.b64 {%r162,%r163}, %rd287;
// end inline asm
shfl.sync.down.b32 %r165, %r163, 4, 31, -1;
shfl.sync.down.b32 %r164, %r162, 4, 31, -1;
// begin inline asm
mov.b64 %rd288, {%r164,%r165};
// end inline asm
mov.b64 %rd289, %fd111;
// begin inline asm
mov.b64 {%r166,%r167}, %rd289;
// end inline asm
shfl.sync.down.b32 %r169, %r167, 4, 31, -1;
shfl.sync.down.b32 %r168, %r166, 4, 31, -1;
// begin inline asm
mov.b64 %rd290, {%r168,%r169};
// end inline asm
mov.b64 %fd112, %rd290;
mov.b64 %fd113, %rd288;
mov.b64 %fd114, %rd286;
$L__tmp142:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd115, %fd109, %fd114;
$L__tmp143:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd116, %fd110, %fd113;
$L__tmp144:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd117, %fd111, %fd112;
$L__tmp145:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd291, %fd115;
// begin inline asm
mov.b64 {%r170,%r171}, %rd291;
// end inline asm
shfl.sync.down.b32 %r173, %r171, 2, 31, -1;
shfl.sync.down.b32 %r172, %r170, 2, 31, -1;
// begin inline asm
mov.b64 %rd292, {%r172,%r173};
// end inline asm
mov.b64 %rd293, %fd116;
// begin inline asm
mov.b64 {%r174,%r175}, %rd293;
// end inline asm
shfl.sync.down.b32 %r177, %r175, 2, 31, -1;
shfl.sync.down.b32 %r176, %r174, 2, 31, -1;
// begin inline asm
mov.b64 %rd294, {%r176,%r177};
// end inline asm
mov.b64 %rd295, %fd117;
// begin inline asm
mov.b64 {%r178,%r179}, %rd295;
// end inline asm
shfl.sync.down.b32 %r181, %r179, 2, 31, -1;
shfl.sync.down.b32 %r180, %r178, 2, 31, -1;
// begin inline asm
mov.b64 %rd296, {%r180,%r181};
// end inline asm
mov.b64 %fd118, %rd296;
mov.b64 %fd119, %rd294;
mov.b64 %fd120, %rd292;
$L__tmp146:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd121, %fd115, %fd120;
$L__tmp147:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd122, %fd116, %fd119;
$L__tmp148:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd123, %fd117, %fd118;
$L__tmp149:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd297, %fd121;
// begin inline asm
mov.b64 {%r182,%r183}, %rd297;
// end inline asm
shfl.sync.down.b32 %r185, %r183, 1, 31, -1;
shfl.sync.down.b32 %r184, %r182, 1, 31, -1;
// begin inline asm
mov.b64 %rd298, {%r184,%r185};
// end inline asm
mov.b64 %rd299, %fd122;
// begin inline asm
mov.b64 {%r186,%r187}, %rd299;
// end inline asm
shfl.sync.down.b32 %r189, %r187, 1, 31, -1;
shfl.sync.down.b32 %r188, %r186, 1, 31, -1;
// begin inline asm
mov.b64 %rd300, {%r188,%r189};
// end inline asm
mov.b64 %rd301, %fd123;
// begin inline asm
mov.b64 {%r190,%r191}, %rd301;
// end inline asm
shfl.sync.down.b32 %r193, %r191, 1, 31, -1;
shfl.sync.down.b32 %r192, %r190, 1, 31, -1;
// begin inline asm
mov.b64 %rd302, {%r192,%r193};
// end inline asm
mov.b64 %fd124, %rd302;
mov.b64 %fd125, %rd300;
mov.b64 %fd126, %rd298;
$L__tmp150:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd246, %fd121, %fd126;
st.local.f64 [%rd4], %fd246;
$L__tmp151:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd247, %fd122, %fd125;
st.local.f64 [%rd4+8], %fd247;
$L__tmp152:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd127, %fd123, %fd124;
st.local.f64 [%rd4+16], %fd127;
$L__tmp153:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
bra.uni $L__BB4_48;
$L__tmp154:
$L__BB4_77: // %if.end45.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.gt.u32 %p66, %r7, 1;
@%p66 bra $L__BB4_120;
bra.uni $L__BB4_78;
$L__BB4_120: // %__kmpc_begin_sharing_variables.exit.i.i
// in Loop: Header=BB4_15 Depth=1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r8;
mov.u32 %r335, 1;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r335;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r335;
mov.u32 %r333, 8;
// begin inline asm
barrier.sync %r333;
// end inline asm
// begin inline asm
barrier.sync %r333;
// end inline asm
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r471;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r471;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r335;
bra.uni $L__BB4_121;
$L__BB4_78: // %if.else.i.i4468.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u32 %r223, 1;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r223;
$L__tmp155:
.loc 1 789 30 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:30
ld.shared.u64 %rd310, [NumPtcls1_shared2_$_0];
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
setp.eq.s64 %p67, %rd310, 0;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p67 bra $L__BB4_119;
// %bb.79: // %omp.precond.then.i6642.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
add.s64 %rd89, %rd310, -1;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
ld.shared.u32 %r224, [_ZN4ompx5state9TeamStateE_$_1];
ld.shared.u32 %r225, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p68, %r225, 0;
selp.b32 %r226, %r6, %r225, %p68;
setp.eq.s32 %p69, %r224, 1;
selp.b32 %r227, %r1, 0, %p69;
cvt.s64.s32 %rd489, %r227;
cvt.s64.s32 %rd91, %r226;
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
setp.lt.u64 %p70, %rd89, %rd489;
mov.f64 %fd264, %fd143;
mov.f64 %fd263, %fd143;
mov.f64 %fd262, %fd143;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p70 bra $L__BB4_84;
// %bb.80: // %omp.inner.for.cond.preheader.lr.ph.i6655.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd488, %rd489, 1;
$L__tmp156:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
ld.shared.u64 %rd311, [confgListOccup_ptr2_shared3_$_0];
ld.shared.u64 %rd312, [psiV_list_devptr4_shared6_$_0];
ld.shared.u64 %rd313, [iw_shared5_$_0];
shl.b64 %rd314, %rd313, 3;
add.s64 %rd315, %rd312, %rd314;
ld.u64 %rd93, [%rd315];
ld.shared.u64 %rd316, [psiV_temp_list_ptr3_shared4_$_0];
add.s64 %rd317, %rd316, %rd314;
ld.u64 %rd318, [%rd317];
ld.shared.u64 %rd319, [psiMinv_temp_list_devptr5_shared_$_0];
add.s64 %rd320, %rd319, %rd314;
ld.u64 %rd321, [%rd320];
ld.shared.u64 %rd322, [psiMinv_cols6_shared_$_0];
ld.shared.s32 %rd323, [WorkingIndex7_shared7_$_0];
ld.shared.u64 %rd324, [dpsiV_list_ptr8_shared_$_0];
add.s64 %rd325, %rd324, %rd314;
ld.u64 %rd94, [%rd325];
$L__tmp157:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
mul.lo.s64 %rd326, %rd322, %rd489;
add.s64 %rd327, %rd326, %rd323;
shl.b64 %rd328, %rd327, 3;
add.s64 %rd487, %rd321, %rd328;
shl.b64 %rd96, %rd322, 3;
mul.lo.s64 %rd97, %rd96, %rd91;
shl.b64 %rd329, %rd489, 3;
add.s64 %rd486, %rd318, %rd329;
shl.b64 %rd99, %rd91, 3;
add.s64 %rd485, %rd311, %rd329;
mov.f64 %fd262, 0d0000000000000000;
mov.u64 %rd490, %rd489;
mov.f64 %fd263, %fd262;
mov.f64 %fd264, %fd262;
$L__BB4_81: // %omp.inner.for.cond.preheader.i6662.i.i
// Parent Loop BB4_15 Depth=1
// => This Loop Header: Depth=2
// Child Loop BB4_82 Depth 3
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u64 %rd491, %rd485;
mov.u64 %rd492, %rd486;
mov.u64 %rd493, %rd487;
mov.u64 %rd494, %rd490;
$L__BB4_82: // %omp.inner.for.body.i6669.i.i
// Parent Loop BB4_15 Depth=1
// Parent Loop BB4_81 Depth=2
// => This Inner Loop Header: Depth=3
.loc 1 791 26 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:791:26
ld.u64 %rd330, [%rd491];
.loc 1 792 37 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:792:37
shl.b64 %rd331, %rd330, 3;
add.s64 %rd332, %rd93, %rd331;
ld.f64 %fd145, [%rd332];
.loc 1 792 35 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:792:35
st.f64 [%rd492], %fd145;
.loc 1 793 95 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:793:95
mul.lo.s64 %rd333, %rd330, 24;
add.s64 %rd334, %rd94, %rd333;
.loc 1 793 93 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:793:93
ld.f64 %fd146, [%rd493];
$L__tmp158:
.loc 3 297 34 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:297:34
ld.f64 %fd147, [%rd334];
.loc 3 297 51 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:297:51
ld.f64 %fd148, [%rd334+8];
.loc 3 297 68 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:297:68
ld.f64 %fd149, [%rd334+16];
$L__tmp159:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
fma.rn.f64 %fd262, %fd147, %fd146, %fd262;
$L__tmp160:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
fma.rn.f64 %fd263, %fd148, %fd146, %fd263;
$L__tmp161:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
fma.rn.f64 %fd264, %fd149, %fd146, %fd264;
$L__tmp162:
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd494, %rd494, 1;
add.s64 %rd493, %rd493, %rd96;
add.s64 %rd492, %rd492, 8;
add.s64 %rd491, %rd491, 8;
setp.lt.u64 %p71, %rd494, %rd488;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p71 bra $L__BB4_82;
// %bb.83: // %omp.dispatch.inc.i6693.i.i
// in Loop: Header=BB4_81 Depth=2
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd490, %rd490, %rd91;
add.s64 %rd335, %rd489, %rd91;
.loc 1 789 12 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:12
min.u64 %rd489, %rd335, %rd89;
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd488, %rd489, 1;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
add.s64 %rd487, %rd487, %rd97;
add.s64 %rd486, %rd486, %rd99;
add.s64 %rd485, %rd485, %rd99;
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
setp.lt.u64 %p72, %rd490, %rd488;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p72 bra $L__BB4_81;
$L__tmp163:
$L__BB4_84: // %omp.dispatch.end.i6699.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
cvt.u32.u64 %r228, %rd91;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
st.local.f64 [%rd4], %fd262;
st.local.f64 [%rd4+8], %fd263;
st.local.f64 [%rd4+16], %fd264;
$L__tmp164:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.eq.s32 %p73, %r228, 1;
@%p73 bra $L__BB4_118;
// %bb.85: // %if.end5.i.i.i6711.i.i
// in Loop: Header=BB4_15 Depth=1
mov.b64 %rd497, %fd264;
mov.b64 %rd496, %fd262;
mov.b64 %rd495, %fd263;
add.s32 %r33, %r228, 31;
shr.u32 %r481, %r33, 5;
and.b32 %r479, %r228, 31;
setp.ne.s32 %p74, %r479, 0;
add.s32 %r230, %r481, -1;
setp.ge.u32 %p75, %r4, %r230;
and.pred %p76, %p74, %p75;
@%p76 bra $L__BB4_87;
// %bb.86: // %for.body.i.i.preheader.i.i6849.i.i
// in Loop: Header=BB4_15 Depth=1
$L__tmp165:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
// begin inline asm
mov.b64 {%r244,%r245}, %rd496;
// end inline asm
shfl.sync.down.b32 %r247, %r245, 16, 31, -1;
shfl.sync.down.b32 %r246, %r244, 16, 31, -1;
// begin inline asm
mov.b64 %rd343, {%r246,%r247};
// end inline asm
// begin inline asm
mov.b64 {%r248,%r249}, %rd495;
// end inline asm
shfl.sync.down.b32 %r251, %r249, 16, 31, -1;
shfl.sync.down.b32 %r250, %r248, 16, 31, -1;
// begin inline asm
mov.b64 %rd345, {%r250,%r251};
// end inline asm
// begin inline asm
mov.b64 {%r252,%r253}, %rd497;
// end inline asm
shfl.sync.down.b32 %r255, %r253, 16, 31, -1;
shfl.sync.down.b32 %r254, %r252, 16, 31, -1;
// begin inline asm
mov.b64 %rd347, {%r254,%r255};
// end inline asm
mov.b64 %fd159, %rd345;
mov.b64 %fd160, %rd343;
$L__tmp166:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd161, %fd262, %fd160;
$L__tmp167:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd162, %fd263, %fd159;
$L__tmp168:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %fd163, %rd347;
$L__tmp169:
add.f64 %fd164, %fd264, %fd163;
$L__tmp170:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd348, %fd161;
// begin inline asm
mov.b64 {%r256,%r257}, %rd348;
// end inline asm
shfl.sync.down.b32 %r259, %r257, 8, 31, -1;
shfl.sync.down.b32 %r258, %r256, 8, 31, -1;
// begin inline asm
mov.b64 %rd349, {%r258,%r259};
// end inline asm
mov.b64 %rd350, %fd162;
// begin inline asm
mov.b64 {%r260,%r261}, %rd350;
// end inline asm
shfl.sync.down.b32 %r263, %r261, 8, 31, -1;
shfl.sync.down.b32 %r262, %r260, 8, 31, -1;
// begin inline asm
mov.b64 %rd351, {%r262,%r263};
// end inline asm
mov.b64 %rd352, %fd164;
// begin inline asm
mov.b64 {%r264,%r265}, %rd352;
// end inline asm
shfl.sync.down.b32 %r267, %r265, 8, 31, -1;
shfl.sync.down.b32 %r266, %r264, 8, 31, -1;
// begin inline asm
mov.b64 %rd353, {%r266,%r267};
// end inline asm
mov.b64 %fd165, %rd353;
mov.b64 %fd166, %rd351;
mov.b64 %fd167, %rd349;
$L__tmp171:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd168, %fd161, %fd167;
$L__tmp172:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd169, %fd162, %fd166;
$L__tmp173:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd170, %fd164, %fd165;
$L__tmp174:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd354, %fd168;
// begin inline asm
mov.b64 {%r268,%r269}, %rd354;
// end inline asm
shfl.sync.down.b32 %r271, %r269, 4, 31, -1;
shfl.sync.down.b32 %r270, %r268, 4, 31, -1;
// begin inline asm
mov.b64 %rd355, {%r270,%r271};
// end inline asm
mov.b64 %rd356, %fd169;
// begin inline asm
mov.b64 {%r272,%r273}, %rd356;
// end inline asm
shfl.sync.down.b32 %r275, %r273, 4, 31, -1;
shfl.sync.down.b32 %r274, %r272, 4, 31, -1;
// begin inline asm
mov.b64 %rd357, {%r274,%r275};
// end inline asm
mov.b64 %rd358, %fd170;
// begin inline asm
mov.b64 {%r276,%r277}, %rd358;
// end inline asm
shfl.sync.down.b32 %r279, %r277, 4, 31, -1;
shfl.sync.down.b32 %r278, %r276, 4, 31, -1;
// begin inline asm
mov.b64 %rd359, {%r278,%r279};
// end inline asm
mov.b64 %fd171, %rd359;
mov.b64 %fd172, %rd357;
mov.b64 %fd173, %rd355;
$L__tmp175:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd174, %fd168, %fd173;
$L__tmp176:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd175, %fd169, %fd172;
$L__tmp177:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd176, %fd170, %fd171;
$L__tmp178:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd360, %fd174;
// begin inline asm
mov.b64 {%r280,%r281}, %rd360;
// end inline asm
shfl.sync.down.b32 %r283, %r281, 2, 31, -1;
shfl.sync.down.b32 %r282, %r280, 2, 31, -1;
// begin inline asm
mov.b64 %rd361, {%r282,%r283};
// end inline asm
mov.b64 %rd362, %fd175;
// begin inline asm
mov.b64 {%r284,%r285}, %rd362;
// end inline asm
shfl.sync.down.b32 %r287, %r285, 2, 31, -1;
shfl.sync.down.b32 %r286, %r284, 2, 31, -1;
// begin inline asm
mov.b64 %rd363, {%r286,%r287};
// end inline asm
mov.b64 %rd364, %fd176;
// begin inline asm
mov.b64 {%r288,%r289}, %rd364;
// end inline asm
shfl.sync.down.b32 %r291, %r289, 2, 31, -1;
shfl.sync.down.b32 %r290, %r288, 2, 31, -1;
// begin inline asm
mov.b64 %rd365, {%r290,%r291};
// end inline asm
mov.b64 %fd177, %rd365;
mov.b64 %fd178, %rd363;
mov.b64 %fd179, %rd361;
$L__tmp179:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd180, %fd174, %fd179;
$L__tmp180:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd181, %fd175, %fd178;
$L__tmp181:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd182, %fd176, %fd177;
$L__tmp182:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd366, %fd180;
// begin inline asm
mov.b64 {%r292,%r293}, %rd366;
// end inline asm
shfl.sync.down.b32 %r295, %r293, 1, 31, -1;
shfl.sync.down.b32 %r294, %r292, 1, 31, -1;
// begin inline asm
mov.b64 %rd367, {%r294,%r295};
// end inline asm
mov.b64 %rd368, %fd181;
// begin inline asm
mov.b64 {%r296,%r297}, %rd368;
// end inline asm
shfl.sync.down.b32 %r299, %r297, 1, 31, -1;
shfl.sync.down.b32 %r298, %r296, 1, 31, -1;
// begin inline asm
mov.b64 %rd369, {%r298,%r299};
// end inline asm
mov.b64 %rd370, %fd182;
// begin inline asm
mov.b64 {%r300,%r301}, %rd370;
// end inline asm
shfl.sync.down.b32 %r303, %r301, 1, 31, -1;
shfl.sync.down.b32 %r302, %r300, 1, 31, -1;
// begin inline asm
mov.b64 %rd371, {%r302,%r303};
// end inline asm
mov.b64 %fd183, %rd371;
mov.b64 %fd184, %rd369;
mov.b64 %fd185, %rd367;
$L__tmp183:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd262, %fd180, %fd185;
st.local.f64 [%rd4], %fd262;
$L__tmp184:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd263, %fd181, %fd184;
st.local.f64 [%rd4+8], %fd263;
$L__tmp185:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd186, %fd182, %fd183;
st.local.f64 [%rd4+16], %fd186;
$L__tmp186:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
bra.uni $L__BB4_93;
$L__BB4_42: // %if.then15.i.i.i1037.i.i
// in Loop: Header=BB4_15 Depth=1
setp.lt.u32 %p40, %r475, 2;
@%p40 bra $L__BB4_48;
// %bb.43: // %while.body.lr.ph.i.i.i.i1039.i.i
// in Loop: Header=BB4_15 Depth=1
shr.u32 %r474, %r475, 1;
bra.uni $L__BB4_44;
$L__BB4_46: // %_omp_reduction_shuffle_and_reduce_func1.exit.i.i.i.i
// in Loop: Header=BB4_44 Depth=2
add.s32 %r133, %r475, 1;
shr.u32 %r24, %r133, 1;
shr.u32 %r474, %r133, 2;
setp.gt.u32 %p42, %r475, 2;
mov.u64 %rd472, %rd476;
mov.u64 %rd473, %rd475;
mov.u64 %rd474, %rd477;
mov.u32 %r475, %r24;
@%p42 bra $L__BB4_44;
bra.uni $L__BB4_47;
$L__BB4_44: // %while.body.i.i.i.i1042.i.i
// Parent Loop BB4_15 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs5, %r474;
$L__tmp187:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
// begin inline asm
mov.b64 {%r121,%r122}, %rd473;
// end inline asm
shfl.sync.down.b32 %r124, %r122, %r474, 31, -1;
shfl.sync.down.b32 %r123, %r121, %r474, 31, -1;
// begin inline asm
mov.b64 %rd475, {%r123,%r124};
// end inline asm
// begin inline asm
mov.b64 {%r125,%r126}, %rd472;
// end inline asm
shfl.sync.down.b32 %r128, %r126, %r474, 31, -1;
shfl.sync.down.b32 %r127, %r125, %r474, 31, -1;
// begin inline asm
mov.b64 %rd476, {%r127,%r128};
// end inline asm
// begin inline asm
mov.b64 {%r129,%r130}, %rd474;
// end inline asm
shfl.sync.down.b32 %r132, %r130, %r474, 31, -1;
shfl.sync.down.b32 %r131, %r129, %r474, 31, -1;
// begin inline asm
mov.b64 %rd477, {%r131,%r132};
// end inline asm
setp.ge.u16 %p41, %rs2, %rs5;
@%p41 bra $L__BB4_46;
// %bb.45: // %then.i.i.i.i.i
// in Loop: Header=BB4_44 Depth=2
mov.b64 %fd91, %rd474;
mov.b64 %fd92, %rd472;
mov.b64 %fd93, %rd473;
mov.b64 %fd94, %rd477;
mov.b64 %fd95, %rd476;
mov.b64 %fd96, %rd475;
$L__tmp188:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd97, %fd96, %fd93;
$L__tmp189:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd475, %fd97;
$L__tmp190:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd98, %fd95, %fd92;
$L__tmp191:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd476, %fd98;
$L__tmp192:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd99, %fd94, %fd91;
mov.b64 %rd477, %fd99;
bra.uni $L__BB4_46;
$L__tmp193:
$L__BB4_87: // %if.then15.i.i.i6723.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.lt.u32 %p77, %r479, 2;
@%p77 bra $L__BB4_93;
// %bb.88: // %while.body.lr.ph.i.i.i.i6725.i.i
// in Loop: Header=BB4_15 Depth=1
shr.u32 %r478, %r479, 1;
bra.uni $L__BB4_89;
$L__BB4_91: // %_omp_reduction_shuffle_and_reduce_func1.exit.i.i6739.i.i
// in Loop: Header=BB4_89 Depth=2
add.s32 %r243, %r479, 1;
shr.u32 %r39, %r243, 1;
shr.u32 %r478, %r243, 2;
setp.gt.u32 %p79, %r479, 2;
mov.u64 %rd495, %rd499;
mov.u64 %rd496, %rd498;
mov.u64 %rd497, %rd500;
mov.u32 %r479, %r39;
@%p79 bra $L__BB4_89;
bra.uni $L__BB4_92;
$L__BB4_89: // %while.body.i.i.i.i6728.i.i
// Parent Loop BB4_15 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs7, %r478;
$L__tmp194:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
// begin inline asm
mov.b64 {%r231,%r232}, %rd496;
// end inline asm
shfl.sync.down.b32 %r234, %r232, %r478, 31, -1;
shfl.sync.down.b32 %r233, %r231, %r478, 31, -1;
// begin inline asm
mov.b64 %rd498, {%r233,%r234};
// end inline asm
// begin inline asm
mov.b64 {%r235,%r236}, %rd495;
// end inline asm
shfl.sync.down.b32 %r238, %r236, %r478, 31, -1;
shfl.sync.down.b32 %r237, %r235, %r478, 31, -1;
// begin inline asm
mov.b64 %rd499, {%r237,%r238};
// end inline asm
// begin inline asm
mov.b64 {%r239,%r240}, %rd497;
// end inline asm
shfl.sync.down.b32 %r242, %r240, %r478, 31, -1;
shfl.sync.down.b32 %r241, %r239, %r478, 31, -1;
// begin inline asm
mov.b64 %rd500, {%r241,%r242};
// end inline asm
setp.ge.u16 %p78, %rs2, %rs7;
@%p78 bra $L__BB4_91;
// %bb.90: // %then.i.i.i6845.i.i
// in Loop: Header=BB4_89 Depth=2
mov.b64 %fd150, %rd497;
mov.b64 %fd151, %rd495;
mov.b64 %fd152, %rd496;
mov.b64 %fd153, %rd500;
mov.b64 %fd154, %rd499;
mov.b64 %fd155, %rd498;
$L__tmp195:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd156, %fd155, %fd152;
$L__tmp196:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd498, %fd156;
$L__tmp197:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd157, %fd154, %fd151;
$L__tmp198:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd499, %fd157;
$L__tmp199:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd158, %fd153, %fd150;
mov.b64 %rd500, %fd158;
bra.uni $L__BB4_91;
$L__tmp200:
$L__BB4_47: // %if.end22.i.i.loopexit.i.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
st.local.u64 [%rd4], %rd475;
st.local.u64 [%rd4+8], %rd476;
$L__tmp201:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
mov.b64 %fd247, %rd476;
mov.b64 %fd246, %rd475;
$L__tmp202:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
st.local.u64 [%rd4+16], %rd477;
$L__tmp203:
$L__BB4_48: // %if.end22.i.i.i1049.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:59
setp.eq.s32 %p33, %r1, %r3;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
selp.b32 %r117, 0, %r1, %p26;
selp.b32 %r14, %r117, %r1, %p33;
setp.lt.u32 %p43, %r473, 33;
@%p43 bra $L__BB4_71;
// %bb.49: // %if.then25.i.i.i1053.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u64 %rd478, 0;
bra.uni $L__BB4_50;
$L__BB4_64: // %ifcont4.i.i.i.i.i
// in Loop: Header=BB4_50 Depth=2
$L__tmp204:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
add.s64 %rd478, %rd478, 4;
cvt.u32.u64 %r206, %rd478;
setp.ne.s32 %p58, %r206, 24;
@%p58 bra $L__BB4_50;
bra.uni $L__BB4_65;
$L__BB4_50: // %body.i.i.i.i.i
// Parent Loop BB4_15 Depth=1
// => This Inner Loop Header: Depth=2
ld.shared.u32 %r194, [IsSPMDMode];
setp.ne.s32 %p44, %r194, 0;
@%p44 bra $L__BB4_53;
bra.uni $L__BB4_51;
$L__BB4_53: // %if.then2.i.i.i.i.i.i
// in Loop: Header=BB4_50 Depth=2
bar.sync 0;
bra.uni $L__BB4_55;
$L__BB4_51: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i.i.i.i.i.i
// in Loop: Header=BB4_50 Depth=2
ld.shared.u32 %r26, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p46, %r26, 0;
or.pred %p47, %p82, %p46;
@%p47 bra $L__BB4_54;
// %bb.52: // %if.then.i.i.i.i.i.i
// in Loop: Header=BB4_50 Depth=2
membar.gl;
bra.uni $L__BB4_55;
$L__BB4_54: // %if.else.i.i.i.i.i.i.i.i
// in Loop: Header=BB4_50 Depth=2
setp.eq.s32 %p48, %r26, 1;
ld.shared.u32 %r197, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p49, %r197, 0;
selp.b32 %r198, %r6, %r197, %p49;
selp.b32 %r196, %r198, 1, %p48;
mov.u32 %r195, 7;
// begin inline asm
barrier.sync %r195, %r196;
// end inline asm
$L__BB4_55: // %__kmpc_barrier.exit.i.i.i.i.i
// in Loop: Header=BB4_50 Depth=2
add.s64 %rd72, %rd4, %rd478;
@%p87 bra $L__BB4_57;
// %bb.56: // %then.i43.i.i.i.i
// in Loop: Header=BB4_50 Depth=2
ld.local.u32 %r199, [%rd72];
st.volatile.shared.u32 [%rd21], %r199;
$L__BB4_57: // %ifcont.i42.i.i.i.i
// in Loop: Header=BB4_50 Depth=2
ld.shared.u32 %r200, [IsSPMDMode];
setp.ne.s32 %p51, %r200, 0;
@%p51 bra $L__BB4_60;
bra.uni $L__BB4_58;
$L__BB4_60: // %if.then2.i2.i.i.i.i.i
// in Loop: Header=BB4_50 Depth=2
bar.sync 0;
bra.uni $L__BB4_62;
$L__BB4_58: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i3.i.i.i.i.i
// in Loop: Header=BB4_50 Depth=2
ld.shared.u32 %r27, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p53, %r27, 0;
or.pred %p54, %p82, %p53;
@%p54 bra $L__BB4_61;
// %bb.59: // %if.then.i17.i.i.i.i.i
// in Loop: Header=BB4_50 Depth=2
membar.gl;
bra.uni $L__BB4_62;
$L__BB4_61: // %if.else.i.i.i9.i.i.i.i.i
// in Loop: Header=BB4_50 Depth=2
setp.eq.s32 %p55, %r27, 1;
ld.shared.u32 %r203, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p56, %r203, 0;
selp.b32 %r204, %r6, %r203, %p56;
selp.b32 %r202, %r204, 1, %p55;
mov.u32 %r201, 7;
// begin inline asm
barrier.sync %r201, %r202;
// end inline asm
$L__tmp205:
$L__BB4_62: // %__kmpc_barrier.exit18.i.i.i.i.i
// in Loop: Header=BB4_50 Depth=2
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.ge.u32 %p57, %r1, %r477;
$L__tmp206:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
@%p57 bra $L__BB4_64;
// %bb.63: // %then2.i.i.i.i.i
// in Loop: Header=BB4_50 Depth=2
ld.volatile.shared.u32 %r205, [%rd22];
st.local.u32 [%rd72], %r205;
bra.uni $L__BB4_64;
$L__tmp207:
$L__BB4_65: // %_omp_reduction_inter_warp_copy_func2.exit.i.i1054.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:59
setp.gt.u32 %p59, %r1, 31;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.lt.u32 %p60, %r18, 64;
or.pred %p61, %p59, %p60;
@%p61 bra $L__BB4_71;
// %bb.66: // %while.body.lr.ph.i49.i.i.i1058.i.i
// in Loop: Header=BB4_15 Depth=1
shr.u32 %r476, %r18, 6;
cvt.u16.u32 %rs3, %r14;
$L__tmp208:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
ld.local.u64 %rd481, [%rd4+16];
$L__tmp209:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
ld.local.u64 %rd480, [%rd4];
ld.local.u64 %rd479, [%rd4+8];
bra.uni $L__BB4_67;
$L__tmp210:
$L__BB4_69: // %_omp_reduction_shuffle_and_reduce_func1.exit243.i.i.i.i
// in Loop: Header=BB4_67 Depth=2
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
add.s32 %r220, %r477, 1;
shr.u32 %r31, %r220, 1;
shr.u32 %r476, %r220, 2;
setp.gt.u32 %p63, %r477, 2;
mov.u64 %rd479, %rd483;
mov.u64 %rd480, %rd482;
mov.u64 %rd481, %rd484;
mov.u32 %r477, %r31;
@%p63 bra $L__BB4_67;
bra.uni $L__BB4_70;
$L__BB4_67: // %while.body.i52.i.i.i1062.i.i
// Parent Loop BB4_15 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs6, %r476;
cvt.s32.s16 %r219, %r476;
$L__tmp211:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
// begin inline asm
mov.b64 {%r207,%r208}, %rd480;
// end inline asm
shfl.sync.down.b32 %r210, %r208, %r219, 31, -1;
shfl.sync.down.b32 %r209, %r207, %r219, 31, -1;
// begin inline asm
mov.b64 %rd482, {%r209,%r210};
// end inline asm
// begin inline asm
mov.b64 {%r211,%r212}, %rd479;
// end inline asm
shfl.sync.down.b32 %r214, %r212, %r219, 31, -1;
shfl.sync.down.b32 %r213, %r211, %r219, 31, -1;
// begin inline asm
mov.b64 %rd483, {%r213,%r214};
// end inline asm
// begin inline asm
mov.b64 {%r215,%r216}, %rd481;
// end inline asm
shfl.sync.down.b32 %r218, %r216, %r219, 31, -1;
shfl.sync.down.b32 %r217, %r215, %r219, 31, -1;
// begin inline asm
mov.b64 %rd484, {%r217,%r218};
// end inline asm
setp.ge.u16 %p62, %rs3, %rs6;
@%p62 bra $L__BB4_69;
// %bb.68: // %then.i237.i.i.i.i
// in Loop: Header=BB4_67 Depth=2
mov.b64 %fd128, %rd481;
mov.b64 %fd129, %rd479;
mov.b64 %fd130, %rd480;
mov.b64 %fd131, %rd484;
mov.b64 %fd132, %rd483;
mov.b64 %fd133, %rd482;
$L__tmp212:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd134, %fd133, %fd130;
$L__tmp213:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd482, %fd134;
$L__tmp214:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd135, %fd132, %fd129;
$L__tmp215:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd483, %fd135;
$L__tmp216:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd136, %fd131, %fd128;
mov.b64 %rd484, %fd136;
bra.uni $L__BB4_69;
$L__tmp217:
$L__BB4_70: // %if.end29.i.i.loopexit.i.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
mov.b64 %fd247, %rd483;
mov.b64 %fd246, %rd482;
$L__tmp218:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
st.local.u64 [%rd4+16], %rd484;
$L__tmp219:
$L__BB4_71: // %if.end29.i.i.i1051.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.ne.s32 %p64, %r14, 0;
@%p64 bra $L__BB4_74;
// %bb.72: // %if.end29.i.i..omp.reduction.then_crit_edge.i.i.i
// in Loop: Header=BB4_15 Depth=1
$L__tmp220:
.loc 3 112 5 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:112:5
ld.local.f64 %fd248, [%rd4+16];
$L__BB4_73: // %.omp.reduction.then.i1052.i.i
// in Loop: Header=BB4_15 Depth=1
$L__tmp221:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
ld.shared.f64 %fd137, [ratioGradRef_local_shared];
add.f64 %fd138, %fd137, %fd246;
st.shared.f64 [ratioGradRef_local_shared], %fd138;
$L__tmp222:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
ld.shared.f64 %fd139, [ratioGradRef_local_shared+8];
add.f64 %fd140, %fd139, %fd247;
st.shared.f64 [ratioGradRef_local_shared+8], %fd140;
$L__tmp223:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
ld.shared.f64 %fd141, [ratioGradRef_local_shared+16];
add.f64 %fd142, %fd141, %fd248;
st.shared.f64 [ratioGradRef_local_shared+16], %fd142;
$L__tmp224:
$L__BB4_74: // %if.end41.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
bar.sync 0;
@%p20 bra $L__BB4_76;
// %bb.75: // %if.then.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r471;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r471;
mov.u32 %r222, 1;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r222;
$L__BB4_76: // %_ZN4ompx5state9ValueRAIIINS0_5ValueIjLNS0_9ValueKindE7EEEjED1Ev.exit.i.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
bar.sync 0;
bra.uni $L__BB4_121;
$L__BB4_92: // %if.end22.i.i.loopexit.i6747.i.i
// in Loop: Header=BB4_15 Depth=1
$L__tmp225:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
st.local.u64 [%rd4], %rd498;
st.local.u64 [%rd4+8], %rd499;
$L__tmp226:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
mov.b64 %fd263, %rd499;
mov.b64 %fd262, %rd498;
$L__tmp227:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
st.local.u64 [%rd4+16], %rd500;
$L__tmp228:
$L__BB4_93: // %if.end22.i.i.i6748.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.lt.u32 %p80, %r228, 33;
@%p80 bra $L__BB4_116;
// %bb.94: // %if.then25.i.i.i6768.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u64 %rd501, 0;
bra.uni $L__BB4_95;
$L__BB4_109: // %ifcont4.i.i.i6783.i.i
// in Loop: Header=BB4_95 Depth=2
$L__tmp229:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
add.s64 %rd501, %rd501, 4;
cvt.u32.u64 %r317, %rd501;
setp.ne.s32 %p95, %r317, 24;
@%p95 bra $L__BB4_95;
bra.uni $L__BB4_110;
$L__BB4_95: // %body.i.i.i6774.i.i
// Parent Loop BB4_15 Depth=1
// => This Inner Loop Header: Depth=2
ld.shared.u32 %r305, [IsSPMDMode];
setp.ne.s32 %p81, %r305, 0;
@%p81 bra $L__BB4_98;
bra.uni $L__BB4_96;
$L__BB4_98: // %if.then2.i.i.i.i6777.i.i
// in Loop: Header=BB4_95 Depth=2
bar.sync 0;
bra.uni $L__BB4_100;
$L__BB4_96: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i.i.i.i6834.i.i
// in Loop: Header=BB4_95 Depth=2
ld.shared.u32 %r41, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p83, %r41, 0;
or.pred %p84, %p82, %p83;
@%p84 bra $L__BB4_99;
// %bb.97: // %if.then.i.i.i.i6844.i.i
// in Loop: Header=BB4_95 Depth=2
membar.gl;
bra.uni $L__BB4_100;
$L__BB4_99: // %if.else.i.i.i.i.i.i6837.i.i
// in Loop: Header=BB4_95 Depth=2
setp.eq.s32 %p85, %r41, 1;
ld.shared.u32 %r308, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p86, %r308, 0;
selp.b32 %r309, %r6, %r308, %p86;
selp.b32 %r307, %r309, 1, %p85;
mov.u32 %r306, 7;
// begin inline asm
barrier.sync %r306, %r307;
// end inline asm
$L__BB4_100: // %__kmpc_barrier.exit.i.i.i6778.i.i
// in Loop: Header=BB4_95 Depth=2
add.s64 %rd137, %rd4, %rd501;
@%p87 bra $L__BB4_102;
// %bb.101: // %then.i43.i.i6833.i.i
// in Loop: Header=BB4_95 Depth=2
ld.local.u32 %r310, [%rd137];
st.volatile.shared.u32 [%rd21], %r310;
$L__BB4_102: // %ifcont.i42.i.i6779.i.i
// in Loop: Header=BB4_95 Depth=2
ld.shared.u32 %r311, [IsSPMDMode];
setp.ne.s32 %p88, %r311, 0;
@%p88 bra $L__BB4_105;
bra.uni $L__BB4_103;
$L__BB4_105: // %if.then2.i2.i.i.i6781.i.i
// in Loop: Header=BB4_95 Depth=2
bar.sync 0;
bra.uni $L__BB4_107;
$L__BB4_103: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i3.i.i.i6822.i.i
// in Loop: Header=BB4_95 Depth=2
ld.shared.u32 %r42, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p90, %r42, 0;
or.pred %p91, %p82, %p90;
@%p91 bra $L__BB4_106;
// %bb.104: // %if.then.i17.i.i.i6832.i.i
// in Loop: Header=BB4_95 Depth=2
membar.gl;
bra.uni $L__BB4_107;
$L__BB4_106: // %if.else.i.i.i9.i.i.i6825.i.i
// in Loop: Header=BB4_95 Depth=2
setp.eq.s32 %p92, %r42, 1;
ld.shared.u32 %r314, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p93, %r314, 0;
selp.b32 %r315, %r6, %r314, %p93;
selp.b32 %r313, %r315, 1, %p92;
mov.u32 %r312, 7;
// begin inline asm
barrier.sync %r312, %r313;
// end inline asm
$L__tmp230:
$L__BB4_107: // %__kmpc_barrier.exit18.i.i.i6782.i.i
// in Loop: Header=BB4_95 Depth=2
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.ge.u32 %p94, %r1, %r481;
$L__tmp231:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
@%p94 bra $L__BB4_109;
// %bb.108: // %then2.i.i.i6821.i.i
// in Loop: Header=BB4_95 Depth=2
ld.volatile.shared.u32 %r316, [%rd22];
st.local.u32 [%rd137], %r316;
bra.uni $L__BB4_109;
$L__tmp232:
$L__BB4_110: // %_omp_reduction_inter_warp_copy_func2.exit.i.i6785.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:59
setp.gt.u32 %p96, %r1, 31;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.lt.u32 %p97, %r33, 64;
or.pred %p98, %p96, %p97;
@%p98 bra $L__BB4_116;
// %bb.111: // %while.body.lr.ph.i49.i.i.i6789.i.i
// in Loop: Header=BB4_15 Depth=1
shr.u32 %r480, %r33, 6;
$L__tmp233:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
ld.local.u64 %rd504, [%rd4+16];
$L__tmp234:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
ld.local.u64 %rd503, [%rd4];
ld.local.u64 %rd502, [%rd4+8];
bra.uni $L__BB4_112;
$L__tmp235:
$L__BB4_114: // %_omp_reduction_shuffle_and_reduce_func1.exit243.i.i6808.i.i
// in Loop: Header=BB4_112 Depth=2
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
add.s32 %r331, %r481, 1;
shr.u32 %r46, %r331, 1;
shr.u32 %r480, %r331, 2;
setp.gt.u32 %p100, %r481, 2;
mov.u64 %rd502, %rd506;
mov.u64 %rd503, %rd505;
mov.u64 %rd504, %rd507;
mov.u32 %r481, %r46;
@%p100 bra $L__BB4_112;
bra.uni $L__BB4_115;
$L__BB4_112: // %while.body.i52.i.i.i6795.i.i
// Parent Loop BB4_15 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs8, %r480;
cvt.s32.s16 %r330, %r480;
$L__tmp236:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
// begin inline asm
mov.b64 {%r318,%r319}, %rd503;
// end inline asm
shfl.sync.down.b32 %r321, %r319, %r330, 31, -1;
shfl.sync.down.b32 %r320, %r318, %r330, 31, -1;
// begin inline asm
mov.b64 %rd505, {%r320,%r321};
// end inline asm
// begin inline asm
mov.b64 {%r322,%r323}, %rd502;
// end inline asm
shfl.sync.down.b32 %r325, %r323, %r330, 31, -1;
shfl.sync.down.b32 %r324, %r322, %r330, 31, -1;
// begin inline asm
mov.b64 %rd506, {%r324,%r325};
// end inline asm
// begin inline asm
mov.b64 {%r326,%r327}, %rd504;
// end inline asm
shfl.sync.down.b32 %r329, %r327, %r330, 31, -1;
shfl.sync.down.b32 %r328, %r326, %r330, 31, -1;
// begin inline asm
mov.b64 %rd507, {%r328,%r329};
// end inline asm
setp.ge.u16 %p99, %rs1, %rs8;
@%p99 bra $L__BB4_114;
// %bb.113: // %then.i237.i.i6817.i.i
// in Loop: Header=BB4_112 Depth=2
mov.b64 %fd187, %rd504;
mov.b64 %fd188, %rd502;
mov.b64 %fd189, %rd503;
mov.b64 %fd190, %rd507;
mov.b64 %fd191, %rd506;
mov.b64 %fd192, %rd505;
$L__tmp237:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd193, %fd192, %fd189;
$L__tmp238:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd505, %fd193;
$L__tmp239:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd194, %fd191, %fd188;
$L__tmp240:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd506, %fd194;
$L__tmp241:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd195, %fd190, %fd187;
mov.b64 %rd507, %fd195;
bra.uni $L__BB4_114;
$L__tmp242:
$L__BB4_115: // %if.end29.i.i.loopexit.i6816.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
mov.b64 %fd263, %rd506;
mov.b64 %fd262, %rd505;
$L__tmp243:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
st.local.u64 [%rd4+16], %rd507;
$L__tmp244:
$L__BB4_116: // %if.end29.i.i.i6752.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
setp.ne.s32 %p101, %r1, 0;
$L__tmp245:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p101 bra $L__BB4_119;
// %bb.117: // %if.end29.i.i..omp.reduction.then_crit_edge.i6757.i.i
// in Loop: Header=BB4_15 Depth=1
$L__tmp246:
.loc 3 112 5 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:112:5
ld.local.f64 %fd264, [%rd4+16];
$L__BB4_118: // %.omp.reduction.then.i6759.i.i
// in Loop: Header=BB4_15 Depth=1
$L__tmp247:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
ld.shared.f64 %fd196, [ratioGradRef_local_shared];
add.f64 %fd197, %fd196, %fd262;
st.shared.f64 [ratioGradRef_local_shared], %fd197;
$L__tmp248:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
ld.shared.f64 %fd198, [ratioGradRef_local_shared+8];
add.f64 %fd199, %fd198, %fd263;
st.shared.f64 [ratioGradRef_local_shared+8], %fd199;
$L__tmp249:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
ld.shared.f64 %fd200, [ratioGradRef_local_shared+16];
add.f64 %fd201, %fd200, %fd264;
st.shared.f64 [ratioGradRef_local_shared+16], %fd201;
$L__tmp250:
$L__BB4_119: // %_ZN12_GLOBAL__N_115invokeMicrotaskEiiPvPS0_l.exit4408.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r471;
$L__BB4_121: // %__kmpc_parallel_51.exit.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 795 29 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:795:29
ld.shared.u64 %rd379, [iw_shared5_$_0];
.loc 1 795 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:795:7
mul.lo.s64 %rd380, %rd379, 24;
add.s64 %rd381, %rd3, %rd380;
.loc 1 795 33 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:795:33
ld.shared.u64 %rd382, [ratioGradRef_local_shared+16];
st.global.u64 [%rd381+16], %rd382;
ld.shared.u64 %rd383, [ratioGradRef_local_shared+8];
st.global.u64 [%rd381+8], %rd383;
ld.shared.u64 %rd384, [ratioGradRef_local_shared];
st.global.u64 [%rd381], %rd384;
.loc 1 797 17 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:797:17
@%p6 bra $L__BB4_123;
// %bb.122: // %region.guarded12.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 17 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:17
mov.u64 %rd385, 0;
.loc 1 797 17 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:797:17
st.shared.u64 [c_ratio_shared8_$_0], %rd385;
$L__BB4_123: // %region.barrier10.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
ld.shared.u32 %r337, [IsSPMDMode];
setp.eq.s32 %p103, %r337, 0;
selp.b32 %r338, -32, 0, %p103;
add.s32 %r339, %r338, %r2;
setp.lt.u32 %p104, %r339, 32;
and.b32 %r340, %r339, -32;
selp.b32 %r48, 1, %r340, %p104;
selp.b32 %r341, %r48, %r2, %p103;
setp.eq.s32 %p105, %r341, %r339;
selp.b32 %r49, 0, %r341, %p105;
@%p103 bra $L__BB4_183;
$L__tmp251:
// %bb.124: // %if.then32.i47.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
setp.ne.s32 %p106, %r1, 0;
$L__tmp252:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
bar.sync 0;
@%p106 bra $L__BB4_126;
// %bb.125: // %if.else.i.i.i67.i
// in Loop: Header=BB4_15 Depth=1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r49;
mov.u32 %r342, 1;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r342;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r342;
$L__BB4_126: // %_ZN4ompx5state9ValueRAIIINS0_5ValueIjLNS0_9ValueKindE1EEEjEC1ERS4_jjbP7IdentTyb.exit.i49.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
bar.sync 0;
add.s32 %r343, %r49, -1;
setp.lt.u32 %p107, %r343, %r1;
@%p107 bra $L__BB4_180;
// %bb.127: // %if.then40.i55.i
// in Loop: Header=BB4_15 Depth=1
$L__tmp253:
.loc 1 799 32 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:32
ld.shared.u64 %rd154, [psiMinv_cols6_shared_$_0];
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
setp.eq.s64 %p108, %rd154, 0;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p108 bra $L__BB4_180;
// %bb.128: // %omp.precond.then.i2.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r50, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p109, %r50, 1;
mov.u32 %r482, 1;
@%p109 bra $L__BB4_131;
// %bb.129: // %cond.false.i.i.i16.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r482, [_ZN4ompx5state9TeamStateE_$_2];
setp.ne.s32 %p110, %r482, 0;
@%p110 bra $L__BB4_131;
// %bb.130: // %cond.false.i.i.i.i18.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r345, [IsSPMDMode];
setp.eq.s32 %p111, %r345, 0;
selp.b32 %r346, -32, 0, %p111;
add.s32 %r482, %r346, %r2;
$L__BB4_131: // %__kmpc_for_static_init_8u.exit.i4.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
add.s64 %rd155, %rd154, -1;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
ld.shared.u32 %r51, [_ZN4ompx5state9TeamStateE_$_1];
setp.eq.s32 %p112, %r50, 0;
setp.eq.s32 %p113, %r51, %r50;
selp.b32 %r347, %r1, 0, %p113;
selp.b32 %r348, 0, %r347, %p112;
cvt.s64.s32 %rd511, %r348;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
setp.lt.u64 %p114, %rd155, %rd511;
mov.f64 %fd283, 0d0000000000000000;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p114 bra $L__BB4_136;
// %bb.132: // %omp.inner.for.cond.preheader.lr.ph.i.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
cvt.s64.s32 %rd157, %r482;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd510, %rd511, 1;
$L__tmp254:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
ld.shared.u64 %rd386, [psiMinv_temp_list_devptr5_shared_$_0];
ld.shared.u64 %rd387, [iw_shared5_$_0];
shl.b64 %rd388, %rd387, 3;
add.s64 %rd389, %rd386, %rd388;
ld.u64 %rd390, [%rd389];
ld.shared.u32 %r349, [WorkingIndex7_shared7_$_0];
ld.shared.u64 %rd391, [psiV_temp_list_ptr3_shared4_$_0];
add.s64 %rd392, %rd391, %rd388;
ld.u64 %rd393, [%rd392];
$L__tmp255:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
shl.b64 %rd394, %rd511, 3;
add.s64 %rd509, %rd393, %rd394;
shl.b64 %rd160, %rd157, 3;
shl.b64 %rd161, %rd154, 3;
mul.lo.s64 %rd395, %rd161, %rd511;
mul.wide.s32 %rd396, %r349, 8;
add.s64 %rd397, %rd395, %rd396;
add.s64 %rd508, %rd390, %rd397;
mul.lo.s64 %rd163, %rd161, %rd157;
mov.f64 %fd283, 0d0000000000000000;
mov.u64 %rd512, %rd511;
$L__BB4_133: // %omp.inner.for.cond.preheader.i.i.i
// Parent Loop BB4_15 Depth=1
// => This Loop Header: Depth=2
// Child Loop BB4_134 Depth 3
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u64 %rd513, %rd508;
mov.u64 %rd514, %rd509;
mov.u64 %rd515, %rd512;
$L__BB4_134: // %omp.inner.for.body.i13.i.i
// Parent Loop BB4_15 Depth=1
// Parent Loop BB4_133 Depth=2
// => This Inner Loop Header: Depth=3
.loc 1 802 20 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:802:20
ld.f64 %fd204, [%rd513];
.loc 1 802 72 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:802:72
ld.f64 %fd205, [%rd514];
.loc 1 802 17 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:802:17
fma.rn.f64 %fd283, %fd205, %fd204, %fd283;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd515, %rd515, 1;
add.s64 %rd514, %rd514, 8;
add.s64 %rd513, %rd513, %rd161;
setp.lt.u64 %p115, %rd515, %rd510;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p115 bra $L__BB4_134;
// %bb.135: // %omp.dispatch.inc.i.i.i
// in Loop: Header=BB4_133 Depth=2
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd512, %rd512, %rd157;
add.s64 %rd398, %rd511, %rd157;
.loc 1 799 12 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:12
min.u64 %rd511, %rd398, %rd155;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd510, %rd511, 1;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
add.s64 %rd509, %rd509, %rd160;
add.s64 %rd508, %rd508, %rd163;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
setp.lt.u64 %p116, %rd512, %rd510;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p116 bra $L__BB4_133;
$L__BB4_136: // %omp.dispatch.end.i.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u32 %r483, 1;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p109 bra $L__BB4_139;
// %bb.137: // %cond.false.i.i13.i.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r483, [_ZN4ompx5state9TeamStateE_$_2];
setp.ne.s32 %p120, %r483, 0;
@%p120 bra $L__BB4_139;
// %bb.138: // %cond.false.i.i.i14.i.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r351, [IsSPMDMode];
setp.eq.s32 %p121, %r351, 0;
selp.b32 %r352, -32, 0, %p121;
add.s32 %r483, %r352, %r2;
$L__BB4_139: // %omp_get_num_threads.exit.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
setp.eq.s32 %p123, %r483, 1;
mov.pred %p208, 0;
@%p123 bra $L__BB4_178;
// %bb.140: // %if.end5.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
add.s32 %r59, %r483, 31;
shr.u32 %r487, %r59, 5;
and.b32 %r485, %r483, 31;
setp.ne.s32 %p124, %r485, 0;
add.s32 %r353, %r487, -1;
setp.ge.u32 %p125, %r4, %r353;
and.pred %p126, %p124, %p125;
@%p126 bra $L__BB4_142;
// %bb.141: // %for.body.i.i.preheader.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
$L__tmp256:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd401, %fd283;
// begin inline asm
mov.b64 {%r359,%r360}, %rd401;
// end inline asm
shfl.sync.down.b32 %r362, %r360, 16, 31, -1;
shfl.sync.down.b32 %r361, %r359, 16, 31, -1;
// begin inline asm
mov.b64 %rd402, {%r361,%r362};
// end inline asm
$L__tmp257:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %fd208, %rd402;
$L__tmp258:
add.f64 %fd209, %fd283, %fd208;
$L__tmp259:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd403, %fd209;
// begin inline asm
mov.b64 {%r363,%r364}, %rd403;
// end inline asm
shfl.sync.down.b32 %r366, %r364, 8, 31, -1;
shfl.sync.down.b32 %r365, %r363, 8, 31, -1;
// begin inline asm
mov.b64 %rd404, {%r365,%r366};
// end inline asm
mov.b64 %fd210, %rd404;
$L__tmp260:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd211, %fd209, %fd210;
$L__tmp261:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd405, %fd211;
// begin inline asm
mov.b64 {%r367,%r368}, %rd405;
// end inline asm
shfl.sync.down.b32 %r370, %r368, 4, 31, -1;
shfl.sync.down.b32 %r369, %r367, 4, 31, -1;
// begin inline asm
mov.b64 %rd406, {%r369,%r370};
// end inline asm
mov.b64 %fd212, %rd406;
$L__tmp262:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd213, %fd211, %fd212;
$L__tmp263:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd407, %fd213;
// begin inline asm
mov.b64 {%r371,%r372}, %rd407;
// end inline asm
shfl.sync.down.b32 %r374, %r372, 2, 31, -1;
shfl.sync.down.b32 %r373, %r371, 2, 31, -1;
// begin inline asm
mov.b64 %rd408, {%r373,%r374};
// end inline asm
mov.b64 %fd214, %rd408;
$L__tmp264:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd215, %fd213, %fd214;
$L__tmp265:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd409, %fd215;
// begin inline asm
mov.b64 {%r375,%r376}, %rd409;
// end inline asm
shfl.sync.down.b32 %r378, %r376, 1, 31, -1;
shfl.sync.down.b32 %r377, %r375, 1, 31, -1;
// begin inline asm
mov.b64 %rd410, {%r377,%r378};
// end inline asm
mov.b64 %fd216, %rd410;
$L__tmp266:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd283, %fd215, %fd216;
$L__tmp267:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
bra.uni $L__BB4_145;
$L__tmp268:
$L__BB4_183: // %if.end45.i74.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.gt.u32 %p162, %r48, 1;
@%p162 bra $L__BB4_226;
bra.uni $L__BB4_184;
$L__BB4_226: // %__kmpc_begin_sharing_variables.exit.i78.i
// in Loop: Header=BB4_15 Depth=1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r49;
mov.u32 %r468, 1;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r468;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r468;
mov.u32 %r466, 8;
// begin inline asm
barrier.sync %r466;
// end inline asm
// begin inline asm
barrier.sync %r466;
// end inline asm
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r471;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r471;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r468;
bra.uni $L__BB4_227;
$L__BB4_184: // %if.else.i.i4468.i76.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u32 %r407, 1;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r407;
$L__tmp269:
.loc 1 799 32 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:32
ld.shared.u64 %rd183, [psiMinv_cols6_shared_$_0];
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
setp.eq.s64 %p163, %rd183, 0;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p163 bra $L__BB4_225;
// %bb.185: // %omp.precond.then.i5278.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
add.s64 %rd184, %rd183, -1;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
ld.shared.u32 %r408, [_ZN4ompx5state9TeamStateE_$_1];
ld.shared.u32 %r409, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p164, %r409, 0;
selp.b32 %r410, %r6, %r409, %p164;
setp.eq.s32 %p165, %r408, 1;
selp.b32 %r411, %r1, 0, %p165;
cvt.s64.s32 %rd519, %r411;
cvt.s64.s32 %rd186, %r410;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
setp.lt.u64 %p166, %rd184, %rd519;
mov.f64 %fd293, 0d0000000000000000;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p166 bra $L__BB4_190;
// %bb.186: // %omp.inner.for.cond.preheader.lr.ph.i5289.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd518, %rd519, 1;
$L__tmp270:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
ld.shared.u64 %rd419, [psiMinv_temp_list_devptr5_shared_$_0];
ld.shared.u64 %rd420, [iw_shared5_$_0];
shl.b64 %rd421, %rd420, 3;
add.s64 %rd422, %rd419, %rd421;
ld.u64 %rd423, [%rd422];
ld.shared.u32 %r412, [WorkingIndex7_shared7_$_0];
ld.shared.u64 %rd424, [psiV_temp_list_ptr3_shared4_$_0];
add.s64 %rd425, %rd424, %rd421;
ld.u64 %rd426, [%rd425];
$L__tmp271:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
shl.b64 %rd427, %rd519, 3;
add.s64 %rd517, %rd426, %rd427;
shl.b64 %rd189, %rd186, 3;
shl.b64 %rd190, %rd183, 3;
mul.lo.s64 %rd428, %rd190, %rd519;
mul.wide.s32 %rd429, %r412, 8;
add.s64 %rd430, %rd428, %rd429;
add.s64 %rd516, %rd423, %rd430;
mul.lo.s64 %rd192, %rd190, %rd186;
mov.f64 %fd293, 0d0000000000000000;
mov.u64 %rd520, %rd519;
$L__BB4_187: // %omp.inner.for.cond.preheader.i5295.i.i
// Parent Loop BB4_15 Depth=1
// => This Loop Header: Depth=2
// Child Loop BB4_188 Depth 3
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u64 %rd521, %rd516;
mov.u64 %rd522, %rd517;
mov.u64 %rd523, %rd520;
$L__BB4_188: // %omp.inner.for.body.i5300.i.i
// Parent Loop BB4_15 Depth=1
// Parent Loop BB4_187 Depth=2
// => This Inner Loop Header: Depth=3
.loc 1 802 20 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:802:20
ld.f64 %fd223, [%rd521];
.loc 1 802 72 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:802:72
ld.f64 %fd224, [%rd522];
.loc 1 802 17 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:802:17
fma.rn.f64 %fd293, %fd224, %fd223, %fd293;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd523, %rd523, 1;
add.s64 %rd522, %rd522, 8;
add.s64 %rd521, %rd521, %rd190;
setp.lt.u64 %p167, %rd523, %rd518;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p167 bra $L__BB4_188;
// %bb.189: // %omp.dispatch.inc.i5310.i.i
// in Loop: Header=BB4_187 Depth=2
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd520, %rd520, %rd186;
add.s64 %rd431, %rd519, %rd186;
.loc 1 799 12 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:12
min.u64 %rd519, %rd431, %rd184;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd518, %rd519, 1;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
add.s64 %rd517, %rd517, %rd189;
add.s64 %rd516, %rd516, %rd192;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
setp.lt.u64 %p168, %rd520, %rd518;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p168 bra $L__BB4_187;
$L__BB4_190: // %omp.dispatch.end.i5316.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
cvt.u32.u64 %r413, %rd186;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.eq.s32 %p170, %r413, 1;
mov.pred %p209, %p169;
@%p170 bra $L__BB4_223;
// %bb.191: // %if.end5.i.i.i5326.i.i
// in Loop: Header=BB4_15 Depth=1
add.s32 %r77, %r413, 31;
shr.u32 %r491, %r77, 5;
and.b32 %r489, %r413, 31;
setp.ne.s32 %p171, %r489, 0;
add.s32 %r415, %r491, -1;
setp.ge.u32 %p172, %r4, %r415;
and.pred %p173, %p171, %p172;
@%p173 bra $L__BB4_193;
// %bb.192: // %for.body.i.i.preheader.i.i5467.i.i
// in Loop: Header=BB4_15 Depth=1
$L__tmp272:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd434, %fd293;
// begin inline asm
mov.b64 {%r421,%r422}, %rd434;
// end inline asm
shfl.sync.down.b32 %r424, %r422, 16, 31, -1;
shfl.sync.down.b32 %r423, %r421, 16, 31, -1;
// begin inline asm
mov.b64 %rd435, {%r423,%r424};
// end inline asm
$L__tmp273:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %fd227, %rd435;
$L__tmp274:
add.f64 %fd228, %fd293, %fd227;
$L__tmp275:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd436, %fd228;
// begin inline asm
mov.b64 {%r425,%r426}, %rd436;
// end inline asm
shfl.sync.down.b32 %r428, %r426, 8, 31, -1;
shfl.sync.down.b32 %r427, %r425, 8, 31, -1;
// begin inline asm
mov.b64 %rd437, {%r427,%r428};
// end inline asm
mov.b64 %fd229, %rd437;
$L__tmp276:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd230, %fd228, %fd229;
$L__tmp277:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd438, %fd230;
// begin inline asm
mov.b64 {%r429,%r430}, %rd438;
// end inline asm
shfl.sync.down.b32 %r432, %r430, 4, 31, -1;
shfl.sync.down.b32 %r431, %r429, 4, 31, -1;
// begin inline asm
mov.b64 %rd439, {%r431,%r432};
// end inline asm
mov.b64 %fd231, %rd439;
$L__tmp278:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd232, %fd230, %fd231;
$L__tmp279:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd440, %fd232;
// begin inline asm
mov.b64 {%r433,%r434}, %rd440;
// end inline asm
shfl.sync.down.b32 %r436, %r434, 2, 31, -1;
shfl.sync.down.b32 %r435, %r433, 2, 31, -1;
// begin inline asm
mov.b64 %rd441, {%r435,%r436};
// end inline asm
mov.b64 %fd233, %rd441;
$L__tmp280:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd234, %fd232, %fd233;
$L__tmp281:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd442, %fd234;
// begin inline asm
mov.b64 {%r437,%r438}, %rd442;
// end inline asm
shfl.sync.down.b32 %r440, %r438, 1, 31, -1;
shfl.sync.down.b32 %r439, %r437, 1, 31, -1;
// begin inline asm
mov.b64 %rd443, {%r439,%r440};
// end inline asm
mov.b64 %fd235, %rd443;
$L__tmp282:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd293, %fd234, %fd235;
$L__tmp283:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
bra.uni $L__BB4_196;
$L__BB4_142: // %if.then15.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
setp.lt.u32 %p127, %r485, 2;
@%p127 bra $L__BB4_145;
// %bb.143: // %while.body.lr.ph.i.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
shr.u32 %r484, %r485, 1;
$L__BB4_144: // %while.body.i.i.i.i.i.i
// Parent Loop BB4_15 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs9, %r484;
$L__tmp284:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %rd399, %fd283;
$L__tmp285:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
// begin inline asm
mov.b64 {%r354,%r355}, %rd399;
// end inline asm
shfl.sync.down.b32 %r357, %r355, %r484, 31, -1;
shfl.sync.down.b32 %r356, %r354, %r484, 31, -1;
// begin inline asm
mov.b64 %rd400, {%r356,%r357};
// end inline asm
setp.lt.u16 %p128, %rs2, %rs9;
mov.b64 %fd206, %rd400;
selp.f64 %fd207, %fd283, 0d8000000000000000, %p128;
add.f64 %fd283, %fd207, %fd206;
$L__tmp286:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
add.s32 %r358, %r485, 1;
shr.u32 %r65, %r358, 1;
shr.u32 %r484, %r358, 2;
setp.gt.u32 %p129, %r485, 2;
mov.u32 %r485, %r65;
@%p129 bra $L__BB4_144;
$L__BB4_145: // %if.end22.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
setp.eq.s32 %p119, %r1, %r3;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
and.pred %p2, %p119, %p112;
selp.b32 %r55, 0, %r1, %p2;
setp.lt.u32 %p130, %r483, 33;
@%p130 bra $L__BB4_177;
// %bb.146: // %if.then25.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
$L__tmp287:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
ld.shared.u32 %r379, [IsSPMDMode];
setp.ne.s32 %p131, %r379, 0;
@%p131 bra $L__BB4_149;
bra.uni $L__BB4_147;
$L__BB4_149: // %if.then2.i.i53.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
bra.uni $L__BB4_151;
$L__tmp288:
$L__BB4_193: // %if.then15.i.i.i5338.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.lt.u32 %p174, %r489, 2;
@%p174 bra $L__BB4_196;
// %bb.194: // %while.body.lr.ph.i.i.i.i5340.i.i
// in Loop: Header=BB4_15 Depth=1
shr.u32 %r488, %r489, 1;
$L__BB4_195: // %while.body.i.i.i.i5343.i.i
// Parent Loop BB4_15 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs11, %r488;
$L__tmp289:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %rd432, %fd293;
$L__tmp290:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
// begin inline asm
mov.b64 {%r416,%r417}, %rd432;
// end inline asm
shfl.sync.down.b32 %r419, %r417, %r488, 31, -1;
shfl.sync.down.b32 %r418, %r416, %r488, 31, -1;
// begin inline asm
mov.b64 %rd433, {%r418,%r419};
// end inline asm
setp.lt.u16 %p175, %rs2, %rs11;
mov.b64 %fd225, %rd433;
selp.f64 %fd226, %fd293, 0d8000000000000000, %p175;
add.f64 %fd293, %fd226, %fd225;
$L__tmp291:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
add.s32 %r420, %r489, 1;
shr.u32 %r83, %r420, 1;
shr.u32 %r488, %r420, 2;
setp.gt.u32 %p176, %r489, 2;
mov.u32 %r489, %r83;
@%p176 bra $L__BB4_195;
$L__BB4_196: // %if.end22.i.i.i5356.i.i
// in Loop: Header=BB4_15 Depth=1
setp.lt.u32 %p177, %r413, 33;
mov.pred %p209, %p6;
@%p177 bra $L__BB4_223;
// %bb.197: // %if.then25.i.i.i5367.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u32 %r442, 7;
$L__tmp292:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
// begin inline asm
barrier.sync %r442, %r413;
// end inline asm
mov.b64 %rd209, %fd293;
@%p87 bra $L__BB4_199;
// %bb.198: // %then.i72.i.i5457.i.i
// in Loop: Header=BB4_15 Depth=1
cvt.u32.u64 %r85, %rd209;
st.volatile.shared.u32 [%rd21], %r85;
$L__BB4_199: // %ifcont.i55.i.i5376.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r444, [IsSPMDMode];
setp.ne.s32 %p179, %r444, 0;
@%p179 bra $L__BB4_202;
bra.uni $L__BB4_200;
$L__BB4_202: // %if.then2.i2.i57.i.i5378.i.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
bra.uni $L__BB4_204;
$L__BB4_147: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i.i73.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
@!%p2 bra $L__BB4_150;
bra.uni $L__BB4_148;
$L__BB4_148: // %if.then.i.i83.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
membar.gl;
bra.uni $L__BB4_151;
$L__BB4_200: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i3.i61.i.i5446.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r86, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p181, %r86, 0;
or.pred %p182, %p82, %p181;
@%p182 bra $L__BB4_203;
// %bb.201: // %if.then.i17.i71.i.i5456.i.i
// in Loop: Header=BB4_15 Depth=1
membar.gl;
bra.uni $L__BB4_204;
$L__tmp293:
$L__BB4_150: // %if.else.i.i.i.i76.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.eq.s32 %p132, %r50, 1;
$L__tmp294:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
ld.shared.u32 %r382, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p133, %r382, 0;
selp.b32 %r383, %r6, %r382, %p133;
selp.b32 %r381, %r383, 1, %p132;
mov.u32 %r380, 7;
// begin inline asm
barrier.sync %r380, %r381;
// end inline asm
$L__BB4_151: // %__kmpc_barrier.exit.i54.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
mov.b64 %rd180, %fd283;
@%p87 bra $L__BB4_153;
// %bb.152: // %then.i72.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
cvt.u32.u64 %r67, %rd180;
st.volatile.shared.u32 [%rd21], %r67;
$L__BB4_153: // %ifcont.i55.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r384, [IsSPMDMode];
setp.ne.s32 %p135, %r384, 0;
@%p135 bra $L__BB4_156;
bra.uni $L__BB4_154;
$L__BB4_156: // %if.then2.i2.i57.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
bra.uni $L__BB4_158;
$L__BB4_154: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i3.i61.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r68, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p137, %r68, 0;
or.pred %p138, %p82, %p137;
@%p138 bra $L__BB4_157;
// %bb.155: // %if.then.i17.i71.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
membar.gl;
bra.uni $L__BB4_158;
$L__BB4_157: // %if.else.i.i.i9.i64.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
setp.eq.s32 %p139, %r68, 1;
ld.shared.u32 %r387, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p140, %r387, 0;
selp.b32 %r388, %r6, %r387, %p140;
selp.b32 %r386, %r388, 1, %p139;
mov.u32 %r385, 7;
// begin inline asm
barrier.sync %r385, %r386;
// end inline asm
$L__tmp295:
$L__BB4_158: // %__kmpc_barrier.exit18.i58.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.ge.u32 %p141, %r1, %r487;
$L__tmp296:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
@%p141 bra $L__BB4_160;
// %bb.159: // %then2.i60.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
and.b64 %rd181, %rd180, -4294967296;
ld.volatile.shared.u32 %rd411, [%rd22];
or.b64 %rd412, %rd181, %rd411;
mov.b64 %fd283, %rd412;
$L__BB4_160: // %ifcont4.i59.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r389, [IsSPMDMode];
setp.eq.s32 %p142, %r389, 0;
@%p142 bra $L__BB4_162;
// %bb.161: // %if.then2.i.1.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
bra.uni $L__BB4_165;
$L__BB4_162: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i.1.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r69, [_ZN4ompx5state9TeamStateE_$_0];
setp.eq.s32 %p144, %r69, 0;
and.pred %p145, %p119, %p144;
@%p145 bra $L__BB4_164;
bra.uni $L__BB4_163;
$L__BB4_164: // %if.then.i.1.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
membar.gl;
bra.uni $L__BB4_165;
$L__BB4_163: // %if.else.i.i.i.1.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
setp.eq.s32 %p146, %r69, 1;
ld.shared.u32 %r392, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p147, %r392, 0;
selp.b32 %r393, %r6, %r392, %p147;
selp.b32 %r391, %r393, 1, %p146;
mov.u32 %r390, 7;
// begin inline asm
barrier.sync %r390, %r391;
// end inline asm
$L__BB4_165: // %__kmpc_barrier.exit.1.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
mov.b64 %rd182, %fd283;
@%p87 bra $L__BB4_167;
// %bb.166: // %then.1.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
{ .reg .b32 tmp; mov.b64 {tmp, %r70}, %rd182; }
st.volatile.shared.u32 [%rd21], %r70;
$L__BB4_167: // %ifcont.1.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r394, [IsSPMDMode];
setp.eq.s32 %p149, %r394, 0;
@%p149 bra $L__BB4_169;
// %bb.168: // %if.then2.i2.1.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
bra.uni $L__BB4_172;
$L__BB4_169: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i3.1.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r71, [_ZN4ompx5state9TeamStateE_$_0];
setp.eq.s32 %p151, %r71, 0;
and.pred %p152, %p119, %p151;
@%p152 bra $L__BB4_171;
bra.uni $L__BB4_170;
$L__BB4_171: // %if.then.i17.1.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
membar.gl;
bra.uni $L__BB4_172;
$L__BB4_170: // %if.else.i.i.i9.1.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
setp.eq.s32 %p153, %r71, 1;
ld.shared.u32 %r397, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p154, %r397, 0;
selp.b32 %r398, %r6, %r397, %p154;
selp.b32 %r396, %r398, 1, %p153;
mov.u32 %r395, 7;
// begin inline asm
barrier.sync %r395, %r396;
// end inline asm
$L__BB4_172: // %__kmpc_barrier.exit18.1.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
@%p141 bra $L__BB4_174;
$L__tmp297:
// %bb.173: // %_omp_reduction_inter_warp_copy_func2.exit.sink.split.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
ld.volatile.shared.u32 %rd413, [%rd22];
shl.b64 %rd414, %rd413, 32;
and.b64 %rd415, %rd182, 4294967295;
or.b64 %rd416, %rd414, %rd415;
mov.b64 %fd283, %rd416;
$L__tmp298:
$L__BB4_174: // %_omp_reduction_inter_warp_copy_func2.exit.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
setp.gt.u32 %p156, %r1, 31;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.lt.u32 %p157, %r59, 64;
or.pred %p158, %p156, %p157;
@%p158 bra $L__BB4_177;
// %bb.175: // %while.body.lr.ph.i49.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
shr.u32 %r486, %r59, 6;
cvt.u16.u32 %rs4, %r55;
$L__BB4_176: // %while.body.i52.i.i.i.i.i
// Parent Loop BB4_15 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs10, %r486;
$L__tmp299:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %rd417, %fd283;
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
// begin inline asm
mov.b64 {%r399,%r400}, %rd417;
// end inline asm
cvt.s32.s16 %r403, %r486;
shfl.sync.down.b32 %r402, %r400, %r403, 31, -1;
shfl.sync.down.b32 %r401, %r399, %r403, 31, -1;
// begin inline asm
mov.b64 %rd418, {%r401,%r402};
// end inline asm
setp.lt.u16 %p159, %rs4, %rs10;
mov.b64 %fd217, %rd418;
selp.f64 %fd218, %fd283, 0d8000000000000000, %p159;
add.f64 %fd283, %fd218, %fd217;
$L__tmp300:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
add.s32 %r404, %r487, 1;
shr.u32 %r75, %r404, 1;
shr.u32 %r486, %r404, 2;
setp.gt.u32 %p160, %r487, 2;
mov.u32 %r487, %r75;
@%p160 bra $L__BB4_176;
$L__BB4_177: // %if.end29.i.i.i.i.i
// in Loop: Header=BB4_15 Depth=1
setp.ne.s32 %p208, %r55, 0;
$L__BB4_178: // %__kmpc_nvptx_parallel_reduce_nowait_v2.exit.i.i.i
// in Loop: Header=BB4_15 Depth=1
@%p208 bra $L__BB4_180;
// %bb.179: // %.omp.reduction.then.i.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 798 40 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:40
ld.shared.f64 %fd219, [c_ratio_shared8_$_0];
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd220, %fd219, %fd283;
st.shared.f64 [c_ratio_shared8_$_0], %fd220;
$L__tmp301:
$L__BB4_180: // %if.end41.i60.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
bar.sync 0;
@%p106 bra $L__BB4_182;
// %bb.181: // %if.then.i.i.i66.i
// in Loop: Header=BB4_15 Depth=1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r471;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r471;
mov.u32 %r406, 1;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r406;
$L__BB4_182: // %_ZN4ompx5state9ValueRAIIINS0_5ValueIjLNS0_9ValueKindE7EEEjED1Ev.exit.i61.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
bar.sync 0;
bra.uni $L__BB4_227;
$L__BB4_203: // %if.else.i.i.i9.i64.i.i5449.i.i
// in Loop: Header=BB4_15 Depth=1
$L__tmp302:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
setp.eq.s32 %p183, %r86, 1;
ld.shared.u32 %r447, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p184, %r447, 0;
selp.b32 %r448, %r6, %r447, %p184;
selp.b32 %r446, %r448, 1, %p183;
// begin inline asm
barrier.sync %r442, %r446;
// end inline asm
$L__tmp303:
$L__BB4_204: // %__kmpc_barrier.exit18.i58.i.i5379.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.ge.u32 %p185, %r1, %r491;
$L__tmp304:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
@%p185 bra $L__BB4_206;
// %bb.205: // %then2.i60.i.i5442.i.i
// in Loop: Header=BB4_15 Depth=1
and.b64 %rd210, %rd209, -4294967296;
ld.volatile.shared.u32 %rd444, [%rd22];
or.b64 %rd445, %rd210, %rd444;
mov.b64 %fd293, %rd445;
$L__BB4_206: // %ifcont4.i59.i.i5380.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r449, [IsSPMDMode];
setp.eq.s32 %p186, %r449, 0;
@%p186 bra $L__BB4_208;
// %bb.207: // %if.then2.i.1.i.i.i5383.i.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
bra.uni $L__BB4_211;
$L__BB4_208: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i.1.i.i.i5431.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:48
setp.eq.s32 %p187, %r1, %r3;
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
ld.shared.u32 %r87, [_ZN4ompx5state9TeamStateE_$_0];
setp.eq.s32 %p188, %r87, 0;
and.pred %p189, %p187, %p188;
@%p189 bra $L__BB4_210;
bra.uni $L__BB4_209;
$L__BB4_210: // %if.then.i.1.i.i.i5441.i.i
// in Loop: Header=BB4_15 Depth=1
membar.gl;
bra.uni $L__BB4_211;
$L__BB4_209: // %if.else.i.i.i.1.i.i.i5434.i.i
// in Loop: Header=BB4_15 Depth=1
setp.eq.s32 %p190, %r87, 1;
ld.shared.u32 %r452, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p191, %r452, 0;
selp.b32 %r453, %r6, %r452, %p191;
selp.b32 %r451, %r453, 1, %p190;
// begin inline asm
barrier.sync %r442, %r451;
// end inline asm
$L__BB4_211: // %__kmpc_barrier.exit.1.i.i.i5384.i.i
// in Loop: Header=BB4_15 Depth=1
mov.b64 %rd211, %fd293;
@%p87 bra $L__BB4_213;
// %bb.212: // %then.1.i.i.i5428.i.i
// in Loop: Header=BB4_15 Depth=1
{ .reg .b32 tmp; mov.b64 {tmp, %r88}, %rd211; }
st.volatile.shared.u32 [%rd21], %r88;
$L__BB4_213: // %ifcont.1.i.i.i5385.i.i
// in Loop: Header=BB4_15 Depth=1
ld.shared.u32 %r454, [IsSPMDMode];
setp.eq.s32 %p193, %r454, 0;
@%p193 bra $L__BB4_215;
// %bb.214: // %if.then2.i2.1.i.i.i5387.i.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
bra.uni $L__BB4_218;
$L__BB4_215: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i3.1.i.i.i5417.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:48
setp.eq.s32 %p194, %r1, %r3;
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
ld.shared.u32 %r89, [_ZN4ompx5state9TeamStateE_$_0];
setp.eq.s32 %p195, %r89, 0;
and.pred %p196, %p194, %p195;
@%p196 bra $L__BB4_217;
bra.uni $L__BB4_216;
$L__BB4_217: // %if.then.i17.1.i.i.i5427.i.i
// in Loop: Header=BB4_15 Depth=1
membar.gl;
bra.uni $L__BB4_218;
$L__BB4_216: // %if.else.i.i.i9.1.i.i.i5420.i.i
// in Loop: Header=BB4_15 Depth=1
setp.eq.s32 %p197, %r89, 1;
ld.shared.u32 %r457, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p198, %r457, 0;
selp.b32 %r458, %r6, %r457, %p198;
selp.b32 %r456, %r458, 1, %p197;
// begin inline asm
barrier.sync %r442, %r456;
// end inline asm
$L__BB4_218: // %__kmpc_barrier.exit18.1.i.i.i5388.i.i
// in Loop: Header=BB4_15 Depth=1
@%p185 bra $L__BB4_220;
$L__tmp305:
// %bb.219: // %_omp_reduction_inter_warp_copy_func2.exit.sink.split.i.i5412.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
ld.volatile.shared.u32 %rd446, [%rd22];
shl.b64 %rd447, %rd446, 32;
and.b64 %rd448, %rd211, 4294967295;
or.b64 %rd449, %rd447, %rd448;
mov.b64 %fd293, %rd449;
$L__tmp306:
$L__BB4_220: // %_omp_reduction_inter_warp_copy_func2.exit.i.i5389.i.i
// in Loop: Header=BB4_15 Depth=1
setp.gt.u32 %p200, %r1, 31;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.lt.u32 %p201, %r77, 64;
or.pred %p202, %p200, %p201;
mov.pred %p209, %p6;
@%p202 bra $L__BB4_223;
// %bb.221: // %while.body.lr.ph.i49.i.i.i5394.i.i
// in Loop: Header=BB4_15 Depth=1
shr.u32 %r490, %r77, 6;
$L__BB4_222: // %while.body.i52.i.i.i5397.i.i
// Parent Loop BB4_15 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs12, %r490;
$L__tmp307:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %rd450, %fd293;
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
// begin inline asm
mov.b64 {%r459,%r460}, %rd450;
// end inline asm
cvt.s32.s16 %r463, %r490;
shfl.sync.down.b32 %r462, %r460, %r463, 31, -1;
shfl.sync.down.b32 %r461, %r459, %r463, 31, -1;
// begin inline asm
mov.b64 %rd451, {%r461,%r462};
// end inline asm
setp.lt.u16 %p203, %rs1, %rs12;
mov.b64 %fd236, %rd451;
selp.f64 %fd237, %fd293, 0d8000000000000000, %p203;
add.f64 %fd293, %fd237, %fd236;
$L__tmp308:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
add.s32 %r464, %r491, 1;
shr.u32 %r93, %r464, 1;
shr.u32 %r490, %r464, 2;
setp.gt.u32 %p204, %r491, 2;
mov.u32 %r491, %r93;
mov.pred %p209, %p6;
@%p204 bra $L__BB4_222;
$L__BB4_223: // %__kmpc_nvptx_parallel_reduce_nowait_v2.exit.i5362.i.i
// in Loop: Header=BB4_15 Depth=1
@%p209 bra $L__BB4_225;
// %bb.224: // %.omp.reduction.then.i5365.i.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 798 40 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:40
ld.shared.f64 %fd238, [c_ratio_shared8_$_0];
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd239, %fd238, %fd293;
st.shared.f64 [c_ratio_shared8_$_0], %fd239;
$L__tmp309:
$L__BB4_225: // %_ZN12_GLOBAL__N_115invokeMicrotaskEiiPvPS0_l.exit4408.i77.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r471;
$L__tmp310:
$L__BB4_227: // %__kmpc_parallel_51.exit88.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
setp.ne.s32 %p205, %r1, 0;
$L__tmp311:
.loc 1 804 29 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:804:29
@%p205 bra $L__BB4_229;
// %bb.228: // %region.guarded17.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 804 25 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:804:25
ld.shared.u64 %rd452, [iw_shared5_$_0];
.loc 1 804 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:804:7
shl.b64 %rd453, %rd452, 3;
add.s64 %rd454, %rd2, %rd453;
.loc 1 804 31 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:804:31
ld.shared.f64 %fd240, [c_ratio_shared8_$_0];
.loc 1 804 29 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:804:29
st.global.f64 [%rd454], %fd240;
$L__BB4_229: // %region.barrier15.i
// in Loop: Header=BB4_15 Depth=1
bar.sync 0;
.loc 1 805 33 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:805:33
@%p205 bra $L__BB4_231;
// %bb.230: // %region.guarded22.i
// in Loop: Header=BB4_15 Depth=1
.loc 1 805 29 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:805:29
ld.shared.u64 %rd455, [iw_shared5_$_0];
.loc 1 805 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:805:7
shl.b64 %rd456, %rd455, 3;
add.s64 %rd457, %rd1, %rd456;
.loc 1 805 50 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:805:50
ld.shared.f64 %fd241, [c_ratio_shared8_$_0];
.loc 1 805 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:805:48
rcp.rn.f64 %fd242, %fd241;
.loc 1 805 33 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:805:33
st.global.f64 [%rd457], %fd242;
bra.uni $L__BB4_231;
$L__tmp312:
$L__BB4_232: // %__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_omp_outlined.exit
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
ret;
$L__tmp313:
$L__func_end4:
// -- End function
}
//--------------------- .text.__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 --------------------------
.section .text.__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783,"ax",@progbits
.sectionflags @"SHF_BARRIERS=9"
.sectioninfo @"SHI_REGISTERS=96"
.align 128
.text.__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783:
.weak __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783
.type __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783,@function
.size __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783,(.L_x_976 - __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783)
.other __omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783,@"STO_CUDA_ENTRY STV_DEFAULT"
__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783:
/*0000*/ MOV R1, c[0x0][0x28] ;
/*0010*/ S2R R33, SR_TID.X ;
/*0020*/ BSSY B0, `(.L_x_601) ;
/*0030*/ IADD3 R1, R1, -0x18, RZ ;
/*0040*/ ISETP.NE.AND.EX P0, PT, RZ, c[0x0][0x160], PT, !PT ;
/*0050*/ ISETP.NE.AND P1, PT, R33, RZ, PT ;
/*0060*/ P2R R23, PR, RZ, 0x2 ;
/*0070*/ @!P1 IMAD.MOV.U32 R0, RZ, RZ, 0x1 ;
/*0080*/ @!P1 STS [`(IsSPMDMode)], R0 ;
/*0090*/ @!P1 STS [`(_ZN4ompx5state9TeamStateE_$_0)], RZ ;
/*00a0*/ @!P1 STS [`(_ZN4ompx5state9TeamStateE_$_1)], RZ ;
/*00b0*/ @!P1 STS [`(_ZN4ompx5state9TeamStateE_$_2)], R0 ;
/*00c0*/ BAR.SYNC 0x0 ;
/*00d0*/ BAR.SYNC 0x0 ;
/*00e0*/ @P1 BRA `(.L_x_602) ;
/*00f0*/ MOV R2, c[0x0][0x168] ;
/*0100*/ IMAD.MOV.U32 R4, RZ, RZ, c[0x0][0x170] ;
/*0110*/ MOV R3, c[0x0][0x16c] ;
/*0120*/ IMAD.MOV.U32 R6, RZ, RZ, c[0x0][0x178] ;
/*0130*/ MOV R5, c[0x0][0x174] ;
/*0140*/ IMAD.MOV.U32 R8, RZ, RZ, c[0x0][0x180] ;
/*0150*/ MOV R7, c[0x0][0x17c] ;
/*0160*/ IMAD.MOV.U32 R10, RZ, RZ, c[0x0][0x188] ;
/*0170*/ MOV R9, c[0x0][0x184] ;
/*0180*/ STS.64 [`($__NumPtcls1_shared2_$_0__2691)], R2 ;
/*0190*/ MOV R11, c[0x0][0x18c] ;
/*01a0*/ IMAD.MOV.U32 R12, RZ, RZ, c[0x0][0x190] ;
/*01b0*/ MOV R13, c[0x0][0x194] ;
/*01c0*/ STS.64 [`($__confgListOccup_ptr2_shared3_$_0__2693)], R4 ;
/*01d0*/ IMAD.MOV.U32 R0, RZ, RZ, c[0x0][0x198] ;
/*01e0*/ MOV R14, c[0x0][0x1a0] ;
/*01f0*/ IMAD.MOV.U32 R15, RZ, RZ, c[0x0][0x1a4] ;
/*0200*/ STS.64 [`($__psiV_temp_list_ptr3_shared4_$_0__2695)], R6 ;
/*0210*/ STS.64 [`($__psiV_list_devptr4_shared6_$_0__2699)], R8 ;
/*0220*/ STS.64 [`($__psiMinv_temp_list_devptr5_shared_$_0__2703)], R10 ;
/*0230*/ STS.64 [`($__psiMinv_cols6_shared_$_0__2705)], R12 ;
/*0240*/ STS [`($__WorkingIndex7_shared7_$_0__2707)], R0 ;
/*0250*/ STS.64 [`($__dpsiV_list_ptr8_shared_$_0__2709)], R14 ;
.L_x_602:
/*0260*/ BSYNC B0 ;
.L_x_601:
/*0270*/ BAR.SYNC 0x0 ;
/*0280*/ @!P1 STS.64 [`($__iw_shared5_$_0__2697)], RZ ;
/*0290*/ BAR.SYNC 0x0 ;
/*02a0*/ @!P0 EXIT ;
/*02b0*/ IMAD.MOV.U32 R3, RZ, RZ, c[0x0][0xc] ;
/*02c0*/ S2UR UR37, SR_CTAID.X ;
/*02d0*/ ULDC UR36, c[0x0][0x160] ;
/*02e0*/ USHF.R.S32.HI UR36, URZ, 0x1f, UR36 ;
/*02f0*/ SHF.R.S32.HI R2, RZ, 0x1f, R3 ;
/*0300*/ LOP3.LUT R0, R2, UR36, RZ, 0xfc, !PT ;
/*0310*/ ISETP.NE.U32.AND P0, PT, R0, RZ, PT ;
/*0320*/ USHF.R.S32.HI UR38, URZ, 0x1f, UR37 ;
/*0330*/ @P0 BRA `(.L_x_603) ;
/*0340*/ I2F.U32.RP R0, c[0x0][0xc] ;
/*0350*/ ISETP.NE.U32.AND P2, PT, RZ, c[0x0][0xc], PT ;
/*0360*/ MUFU.RCP R0, R0 ;
/*0370*/ IADD3 R4, R0, 0xffffffe, RZ ;
/*0380*/ F2I.FTZ.U32.TRUNC.NTZ R5, R4 ;
/*0390*/ IMAD.MOV.U32 R4, RZ, RZ, RZ ;
/*03a0*/ IMAD.MOV R7, RZ, RZ, -R5 ;
/*03b0*/ IMAD R7, R7, c[0x0][0xc], RZ ;
/*03c0*/ IMAD.HI.U32 R5, R5, R7, R4 ;
/*03d0*/ IMAD.HI.U32 R4, R5, c[0x0][0x160], RZ ;
/*03e0*/ IMAD.MOV R6, RZ, RZ, -R4 ;
/*03f0*/ IMAD.MOV.U32 R5, RZ, RZ, RZ ;
/*0400*/ IMAD R3, R3, R6, c[0x0][0x160] ;
/*0410*/ ISETP.GE.U32.AND P0, PT, R3, c[0x0][0xc], PT ;
/*0420*/ @P0 IADD3 R3, R3, -c[0x0][0xc], RZ ;
/*0430*/ @P0 IADD3 R4, R4, 0x1, RZ ;
/*0440*/ ISETP.GE.U32.AND P1, PT, R3, c[0x0][0xc], PT ;
/*0450*/ @P1 IADD3 R4, R4, 0x1, RZ ;
/*0460*/ @!P2 LOP3.LUT R4, RZ, c[0x0][0xc], RZ, 0x33, !PT ;
/*0470*/ BRA `(.L_x_75) ;
.L_x_603:
/*0480*/ IMAD.MOV.U32 R4, RZ, RZ, c[0x0][0x160] ;
/*0490*/ MOV R6, c[0x0][0xc] ;
/*04a0*/ IMAD.U32 R5, RZ, RZ, UR36 ;
/*04b0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_75@srel)) ;
/*04c0*/ IMAD.MOV.U32 R7, RZ, RZ, R2 ;
/*04d0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_75@srel)) ;
/*04e0*/ CALL.ABS.NOINC `(__cuda_sm20_div_u64) ;
.L_x_75:
/*04f0*/ IADD3 R9, P0, RZ, -R4, RZ ;
/*0500*/ IMAD.U32 R7, RZ, RZ, UR36 ;
/*0510*/ MOV R6, c[0x0][0x160] ;
/*0520*/ ULDC UR4, c[0x0][0x160] ;
/*0530*/ IMAD.X R3, RZ, RZ, ~R5, P0 ;
/*0540*/ UIADD3 UR4, UP0, UR4, -0x1, URZ ;
/*0550*/ IMAD.WIDE.U32 R6, R9, c[0x0][0xc], R6 ;
/*0560*/ UIADD3.X UR5, UR36, -0x1, URZ, UP0, !UPT ;
/*0570*/ IMAD R3, R3, c[0x0][0xc], RZ ;
/*0580*/ ISETP.GT.U32.AND P0, PT, R6, UR37, PT ;
/*0590*/ IMAD R3, R2, R9, R3 ;
/*05a0*/ IMAD.IADD R7, R7, 0x1, R3 ;
/*05b0*/ ISETP.GT.U32.AND.EX P0, PT, R7, UR38, PT, P0 ;
/*05c0*/ @P0 IADD3 R0, P1, R4.reuse, 0x1, RZ ;
/*05d0*/ @!P0 IMAD R11, R5, UR37, RZ ;
/*05e0*/ @!P0 IMAD.WIDE.U32 R6, R4, UR37, R6 ;
/*05f0*/ @P0 IMAD.X R2, RZ, RZ, R5, P1 ;
/*0600*/ @!P0 IMAD R11, R4.reuse, UR38, R11 ;
/*0610*/ @!P0 IADD3 R4, P1, R4, -0x1, RZ ;
/*0620*/ @P0 IMAD R9, R2, UR37, RZ ;
/*0630*/ @P0 IMAD.WIDE.U32 R2, R0, UR37, RZ ;
/*0640*/ @!P0 IADD3.X R5, R5, -0x1, RZ, P1, !PT ;
/*0650*/ @P0 IMAD R9, R0, UR38, R9 ;
/*0660*/ @P0 MOV R32, R2 ;
/*0670*/ @!P0 IMAD.MOV.U32 R32, RZ, RZ, R6 ;
/*0680*/ @P0 IMAD.IADD R31, R3, 0x1, R9 ;
/*0690*/ @!P0 IADD3 R31, R7, R11, RZ ;
/*06a0*/ IADD3 R4, P2, R4, R32, RZ ;
/*06b0*/ ISETP.LT.U32.AND P0, PT, R4, UR4, PT ;
/*06c0*/ IMAD.X R5, R5, 0x1, R31, P2 ;
/*06d0*/ ISETP.LT.U32.AND.EX P0, PT, R5, UR5, PT, P0 ;
/*06e0*/ SEL R30, R4, UR4, P0 ;
/*06f0*/ SEL R29, R5, UR5, P0 ;
/*0700*/ IADD3 R30, P0, R30, 0x1, RZ ;
/*0710*/ IADD3.X R29, RZ, R29, RZ, P0, !PT ;
/*0720*/ ISETP.GE.U32.AND P0, PT, R32, R30, PT ;
/*0730*/ ISETP.GE.U32.AND.EX P0, PT, R31, R29, PT, P0 ;
/*0740*/ @P0 EXIT ;
/*0750*/ IMAD.MOV.U32 R28, RZ, RZ, c[0x0][0x0] ;
/*0760*/ SHF.R.S32.HI R24, RZ, 0x5, R33.reuse ;
/*0770*/ IMAD.SHL.U32 R25, R33.reuse, 0x4, RZ ;
/*0780*/ LOP3.LUT P1, R27, R33, 0x1f, RZ, 0xc0, !PT ;
/*0790*/ IADD3 R28, R28, -0x1, RZ ;
/*07a0*/ IMAD.SHL.U32 R24, R24, 0x4, RZ ;
/*07b0*/ P2R R35, PR, RZ, 0x2 ;
/*07c0*/ LOP3.LUT R28, R28, 0xffffffe0, RZ, 0xc0, !PT ;
/*07d0*/ SHF.R.U32.HI R26, RZ, 0x5, R33 ;
/*07e0*/ ISETP.NE.AND P0, PT, R33, R28, PT ;
/*07f0*/ P2R R0, PR, RZ, 0x1 ;
.L_x_765:
/*0800*/ ISETP.NE.AND P0, PT, R23, RZ, PT ;
/*0810*/ YIELD (*"RELOCATOR OPCODE,YIELD,280"*);
/*0820*/ WARPSYNC 0xffffffff ;
/*0830*/ ULDC UR4, c[0x0][0x0] ;
/*0840*/ UIADD3 UR4, UR4, -0x20, URZ ;
/*0850*/ IMAD.U32 R22, RZ, RZ, UR4 ;
/*0860*/ @!P0 IMAD.MOV.U32 R2, RZ, RZ, R32 ;
/*0870*/ @!P0 IMAD.MOV.U32 R3, RZ, RZ, R31 ;
/*0880*/ @!P0 STS.64 [`($__iw_shared5_$_0__2697)], R2 ;
/*0890*/ BAR.SYNC 0x0 ;
/*08a0*/ @!P0 STS.64 [`($__ratioGradRef_local_shared__2701)], RZ ;
/*08b0*/ BAR.SYNC 0x0 ;
/*08c0*/ @!P0 STS.64 [`(($__ratioGradRef_local_shared__2701 + 0x8))], RZ ;
/*08d0*/ BAR.SYNC 0x0 ;
/*08e0*/ @!P0 STS.64 [`(($__ratioGradRef_local_shared__2701 + 0x10))], RZ ;
/*08f0*/ BAR.SYNC 0x0 ;
/*0900*/ LDS R0, [`(IsSPMDMode)] ;
/*0910*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*0920*/ IMAD.MOV.U32 R0, RZ, RZ, R22 ;
/*0930*/ @P0 IMAD.MOV R0, RZ, RZ, c[0x0][0x0] ;
/*0940*/ ISETP.GE.U32.AND P1, PT, R0.reuse, 0x20, PT ;
/*0950*/ LOP3.LUT R2, R0, 0xffffffe0, RZ, 0xc0, !PT ;
/*0960*/ SEL R2, R2, 0x1, P1 ;
/*0970*/ SEL R3, R2, c[0x0][0x0], !P0 ;
/*0980*/ ISETP.NE.AND P1, PT, R3, R0, PT ;
/*0990*/ SEL R3, R3, RZ, P1 ;
/*09a0*/ @!P0 BRA `(.L_x_604) ;
/*09b0*/ BAR.SYNC 0x0 ;
/*09c0*/ ISETP.NE.AND P0, PT, R33, RZ, PT ;
/*09d0*/ BSSY B10, `(.L_x_605) ;
/*09e0*/ IADD3 R0, R3, -0x1, RZ ;
/*09f0*/ P2R R23, PR, RZ, 0x1 ;
/*0a00*/ @!P0 IMAD.MOV.U32 R2, RZ, RZ, 0x1 ;
/*0a10*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_2)], R3 ;
/*0a20*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_1)], R2 ;
/*0a30*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_0)], R2 ;
/*0a40*/ BAR.SYNC 0x0 ;
/*0a50*/ BAR.SYNC 0x0 ;
/*0a60*/ ISETP.GE.U32.AND P0, PT, R0, R33, PT ;
/*0a70*/ @!P0 BRA `(.L_x_606) ;
/*0a80*/ LDS.64 R4, [`($__NumPtcls1_shared2_$_0__2691)] ;
/*0a90*/ ISETP.NE.U32.AND P0, PT, R4, RZ, PT ;
/*0aa0*/ ISETP.NE.AND.EX P0, PT, R5, RZ, PT, P0 ;
/*0ab0*/ @!P0 BRA `(.L_x_606) ;
/*0ac0*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*0ad0*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_1)] ;
/*0ae0*/ ISETP.NE.AND P1, PT, R0.reuse, 0x1, PT ;
/*0af0*/ ISETP.NE.AND P3, PT, R0, RZ, PT ;
/*0b00*/ ISETP.NE.AND P0, PT, R3, R0, PT ;
/*0b10*/ IADD3 R0, P2, R4, -0x1, RZ ;
/*0b20*/ SEL R2, R33, RZ, !P0 ;
/*0b30*/ IADD3.X R4, R5, -0x1, RZ, P2, !PT ;
/*0b40*/ IMAD.MOV.U32 R5, RZ, RZ, 0x1 ;
/*0b50*/ P2R R34, PR, RZ, 0x8 ;
/*0b60*/ SEL R21, R2, RZ, P3 ;
/*0b70*/ @P1 BRA `(.L_x_607) ;
/*0b80*/ LDS R5, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*0b90*/ ISETP.NE.AND P0, PT, R5, RZ, PT ;
/*0ba0*/ @!P0 LDS R2, [`(IsSPMDMode)] ;
/*0bb0*/ ISETP.NE.AND P2, PT, R2, RZ, !P0 ;
/*0bc0*/ @!P0 IMAD.MOV.U32 R2, RZ, RZ, R22 ;
/*0bd0*/ @P2 IADD3 R2, RZ, c[0x0][0x0], RZ ;
/*0be0*/ @!P0 IMAD.MOV.U32 R5, RZ, RZ, R2 ;
.L_x_607:
/*0bf0*/ ISETP.GE.U32.AND P0, PT, R0, R21, PT ;
/*0c00*/ BSSY B0, `(.L_x_608) ;
/*0c10*/ SHF.R.S32.HI R37, RZ, 0x1f, R21 ;
/*0c20*/ CS2R R2, SRZ ;
/*0c30*/ CS2R R16, SRZ ;
/*0c40*/ CS2R R18, SRZ ;
/*0c50*/ ISETP.GE.U32.AND.EX P0, PT, R4, R37, PT, P0 ;
/*0c60*/ @!P0 BRA `(.L_x_609) ;
/*0c70*/ LDS.64 R6, [`($__iw_shared5_$_0__2697)] ;
/*0c80*/ ULDC.64 UR4, c[0x0][0x118] ;
/*0c90*/ LDS.64 R10, [`($__psiMinv_temp_list_devptr5_shared_$_0__2703)] ;
/*0ca0*/ LDS.64 R8, [`($__psiV_temp_list_ptr3_shared4_$_0__2695)] ;
/*0cb0*/ LDS.64 R12, [`($__dpsiV_list_ptr8_shared_$_0__2709)] ;
/*0cc0*/ LDS.64 R2, [`($__psiV_list_devptr4_shared6_$_0__2699)] ;
/*0cd0*/ LDS R16, [`($__WorkingIndex7_shared7_$_0__2707)] ;
/*0ce0*/ LDS.64 R80, [`($__psiMinv_cols6_shared_$_0__2705)] ;
/*0cf0*/ LDS.64 R82, [`($__confgListOccup_ptr2_shared3_$_0__2693)] ;
/*0d00*/ IMAD.SHL.U32 R15, R6.reuse, 0x8, RZ ;
/*0d10*/ SHF.L.U64.HI R7, R6, 0x3, R7 ;
/*0d20*/ IADD3 R74, P2, R10, R15.reuse, RZ ;
/*0d30*/ IADD3 R10, P0, R8, R15, RZ ;
/*0d40*/ IMAD.X R75, R11, 0x1, R7.reuse, P2 ;
/*0d50*/ IMAD.X R11, R9, 0x1, R7, P0 ;
/*0d60*/ LD.E.64 R74, [R74.64] ;
/*0d70*/ LD.E.64 R10, [R10.64] ;
/*0d80*/ IADD3 R14, P2, R12, R15.reuse, RZ ;
/*0d90*/ IADD3 R2, P0, R2, R15, RZ ;
/*0da0*/ IMAD.X R15, R13, 0x1, R7, P2 ;
/*0db0*/ IADD3.X R3, R3, R7, RZ, P0, !PT ;
/*0dc0*/ LD.E.64 R14, [R14.64] ;
/*0dd0*/ IMAD.MOV.U32 R8, RZ, RZ, R21 ;
/*0de0*/ SHF.R.S32.HI R17, RZ, 0x1f, R16.reuse ;
/*0df0*/ MOV R7, R37 ;
/*0e00*/ IMAD R6, R81, R8.reuse, RZ ;
/*0e10*/ LD.E.64 R12, [R2.64] ;
/*0e20*/ IMAD.WIDE.U32 R16, R80, R8, R16 ;
/*0e30*/ IMAD R9, R80, R7, R6 ;
/*0e40*/ IMAD.IADD R17, R17, 0x1, R9 ;
/*0e50*/ IMAD.SHL.U32 R3, R8, 0x8, RZ ;
/*0e60*/ SHF.L.U64.HI R79, R80.reuse, 0x3, R81 ;
/*0e70*/ SHF.L.U32 R80, R80, 0x3, RZ ;
/*0e80*/ SHF.R.S32.HI R84, RZ, 0x1f, R5 ;
/*0e90*/ IADD3 R78, P2, R82, R3, RZ ;
/*0ea0*/ IMAD.WIDE.U32 R20, R80, R5, RZ ;
/*0eb0*/ CS2R R18, SRZ ;
/*0ec0*/ SHF.L.U64.HI R86, R5, 0x3, R84 ;
/*0ed0*/ IMAD.MOV.U32 R88, RZ, RZ, R8 ;
/*0ee0*/ IMAD.MOV.U32 R87, RZ, RZ, R7 ;
/*0ef0*/ LEA R9, P0, R16, R74, 0x3 ;
/*0f00*/ LEA.HI.X R74, R16, R75, R17, 0x3, P0 ;
/*0f10*/ SHF.L.U64.HI R17, R8, 0x3, R7 ;
/*0f20*/ IADD3 R75, P0, R10, R3, RZ ;
/*0f30*/ IMAD.X R81, R11, 0x1, R17, P0 ;
/*0f40*/ IMAD R11, R79, R5, RZ ;
/*0f50*/ IADD3 R85, P0, R8, 0x1, RZ ;
/*0f60*/ IMAD R11, R84, R80, R11 ;
/*0f70*/ CS2R R2, SRZ ;
/*0f80*/ IMAD.X R82, R83, 0x1, R17, P2 ;
/*0f90*/ SHF.L.U32 R83, R5, 0x3, RZ ;
/*0fa0*/ CS2R R16, SRZ ;
/*0fb0*/ IADD3.X R89, RZ, R7, RZ, P0, !PT ;
/*0fc0*/ IMAD.IADD R91, R21, 0x1, R11 ;
.L_x_617:
/*0fd0*/ ISETP.GE.U32.AND P2, PT, R88, R85, PT ;
/*0fe0*/ BSSY B1, `(.L_x_610) ;
/*0ff0*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x80, 0x0 ;
/*1000*/ IMAD.MOV.U32 R56, RZ, RZ, R78 ;
/*1010*/ ISETP.GE.U32.AND.EX P2, PT, R87, R89, PT, P2 ;
/*1020*/ IMAD.MOV.U32 R57, RZ, RZ, R82 ;
/*1030*/ MOV R62, R75 ;
/*1040*/ IMAD.MOV.U32 R63, RZ, RZ, R81 ;
/*1050*/ MOV R50, R9 ;
/*1060*/ IMAD.MOV.U32 R51, RZ, RZ, R74 ;
/*1070*/ MOV R90, R88 ;
/*1080*/ IMAD.MOV.U32 R6, RZ, RZ, R87 ;
/*1090*/ @!P2 BRA `(.L_x_611) ;
/*10a0*/ ULDC.64 UR4, c[0x0][0x118] ;
/*10b0*/ LD.E.64 R42, [R56.64] ;
/*10c0*/ LEA R44, P0, R42, R12, 0x3 ;
/*10d0*/ LEA.HI.X R45, R42, R13, R43, 0x3, P0 ;
/*10e0*/ LD.E.64 R44, [R44.64] ;
/*10f0*/ IMAD R11, R43, 0x18, RZ ;
/*1100*/ IMAD.WIDE.U32 R42, R42, 0x18, R14 ;
/*1110*/ IMAD.MOV.U32 R40, RZ, RZ, R50 ;
/*1120*/ IMAD.MOV.U32 R41, RZ, RZ, R51 ;
/*1130*/ IMAD.IADD R43, R43, 0x1, R11 ;
/*1140*/ IADD3 R90, P2, R90, 0x1, RZ ;
/*1150*/ ST.E.64 [R62.64], R44 ;
/*1160*/ LD.E.64 R40, [R40.64] ;
/*1170*/ LD.E.64 R38, [R42.64] ;
/*1180*/ LD.E.64 R36, [R42.64+0x8] ;
/*1190*/ LD.E.64 R10, [R42.64+0x10] ;
/*11a0*/ IADD3.X R6, RZ, R6, RZ, P2, !PT ;
/*11b0*/ IADD3 R62, P2, R62, 0x8, RZ ;
/*11c0*/ IADD3 R50, P0, R50, R80, RZ ;
/*11d0*/ IMAD.X R63, RZ, RZ, R63, P2 ;
/*11e0*/ IADD3 R56, P2, R56, 0x8, RZ ;
/*11f0*/ IMAD.X R51, R51, 0x1, R79, P0 ;
/*1200*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*1210*/ IADD3.X R57, RZ, R57, RZ, P2, !PT ;
/*1220*/ DFMA R18, R40, R38, R18 ;
/*1230*/ DFMA R16, R40, R36, R16 ;
/*1240*/ DFMA R2, R40, R10, R2 ;
.L_x_611:
/*1250*/ BSYNC B1 ;
.L_x_610:
/*1260*/ IADD3 R10, P2, R85, -R90.reuse, RZ ;
/*1270*/ BSSY B1, `(.L_x_612) ;
/*1280*/ ISETP.LE.U32.AND P3, PT, R10, 0x3, PT ;
/*1290*/ IMAD.X R11, R89, 0x1, ~R6, P2 ;
/*12a0*/ ISETP.GT.U32.AND P2, PT, R85, R90, PT ;
/*12b0*/ ISETP.GT.U32.AND.EX P2, PT, R89, R6, PT, P2 ;
/*12c0*/ ISETP.LE.U32.OR.EX P2, PT, R11, RZ, !P2, P3 ;
/*12d0*/ @P2 BRA `(.L_x_613) ;
/*12e0*/ IADD3 R93, P2, R85, -0x3, RZ ;
/*12f0*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*1300*/ IADD3.X R92, R89, -0x1, RZ, P2, !PT ;
.L_x_614:
/*1310*/ IMAD.MOV.U32 R40, RZ, RZ, R56 ;
/*1320*/ ULDC.64 UR4, c[0x0][0x118] ;
/*1330*/ IMAD.MOV.U32 R41, RZ, RZ, R57 ;
/*1340*/ LD.E.64 R10, [R40.64] ;
/*1350*/ LEA R42, P2, R10, R12, 0x3 ;
/*1360*/ LEA.HI.X R43, R10, R13, R11, 0x3, P2 ;
/*1370*/ LD.E.64 R42, [R42.64] ;
/*1380*/ IMAD.MOV.U32 R38, RZ, RZ, R62 ;
/*1390*/ MOV R39, R63 ;
/*13a0*/ ST.E.64 [R38.64], R42 ;
/*13b0*/ LD.E.64 R36, [R40.64+0x8] ;
/*13c0*/ IMAD R11, R11, 0x18, RZ ;
/*13d0*/ IMAD.WIDE.U32 R48, R10, 0x18, R14 ;
/*13e0*/ LD.E.64 R62, [R50.64] ;
/*13f0*/ LEA R46, P2, R36, R12, 0x3 ;
/*1400*/ LEA.HI.X R47, R36, R13, R37, 0x3, P2 ;
/*1410*/ LD.E.64 R46, [R46.64] ;
/*1420*/ IMAD.IADD R49, R49, 0x1, R11 ;
/*1430*/ LD.E.64 R68, [R48.64] ;
/*1440*/ LD.E.64 R66, [R48.64+0x8] ;
/*1450*/ LD.E.64 R64, [R48.64+0x10] ;
/*1460*/ ST.E.64 [R38.64+0x8], R46 ;
/*1470*/ LD.E.64 R52, [R40.64+0x10] ;
/*1480*/ LEA R42, P2, R52, R12, 0x3 ;
/*1490*/ LEA.HI.X R43, R52, R13, R53, 0x3, P2 ;
/*14a0*/ LD.E.64 R42, [R42.64] ;
/*14b0*/ IADD3 R10, P2, R50, R80, RZ ;
/*14c0*/ IMAD R37, R37, 0x18, RZ ;
/*14d0*/ IMAD.WIDE.U32 R44, R36, 0x18, R14 ;
/*14e0*/ IMAD.X R11, R51, 0x1, R79, P2 ;
/*14f0*/ IMAD.IADD R45, R45, 0x1, R37 ;
/*1500*/ LD.E.64 R56, [R10.64] ;
/*1510*/ LD.E.64 R60, [R44.64] ;
/*1520*/ LD.E.64 R36, [R44.64+0x8] ;
/*1530*/ LD.E.64 R58, [R44.64+0x10] ;
/*1540*/ ST.E.64 [R38.64+0x10], R42 ;
/*1550*/ LD.E.64 R70, [R40.64+0x18] ;
/*1560*/ IMAD R47, R53, 0x18, RZ ;
/*1570*/ IMAD.WIDE.U32 R52, R52, 0x18, R14 ;
/*1580*/ IADD3 R53, R53, R47, RZ ;
/*1590*/ LD.E.64 R42, [R52.64] ;
/*15a0*/ LD.E.64 R54, [R52.64+0x8] ;
/*15b0*/ LD.E.64 R52, [R52.64+0x10] ;
/*15c0*/ LEA R76, P2, R70, R12, 0x3 ;
/*15d0*/ LEA.HI.X R77, R70, R13, R71, 0x3, P2 ;
/*15e0*/ LD.E.64 R76, [R76.64] ;
/*15f0*/ IADD3 R72, P2, R10, R80, RZ ;
/*1600*/ IMAD.X R73, R11, 0x1, R79, P2 ;
/*1610*/ LD.E.64 R50, [R72.64] ;
/*1620*/ IMAD R11, R71, 0x18, RZ ;
/*1630*/ IADD3 R10, P2, R72, R80, RZ ;
/*1640*/ IMAD.WIDE.U32 R70, R70, 0x18, R14 ;
/*1650*/ IMAD.IADD R71, R71, 0x1, R11 ;
/*1660*/ IADD3.X R11, R73, R79, RZ, P2, !PT ;
/*1670*/ DFMA R68, R62, R68, R18 ;
/*1680*/ DFMA R66, R62, R66, R16 ;
/*1690*/ DFMA R64, R62, R64, R2 ;
/*16a0*/ IADD3 R62, P2, R38, 0x20, RZ ;
/*16b0*/ DFMA R60, R56.reuse, R60, R68 ;
/*16c0*/ ST.E.64 [R38.64+0x18], R76 ;
/*16d0*/ LD.E.64 R46, [R10.64] ;
/*16e0*/ LD.E.64 R44, [R70.64] ;
/*16f0*/ LD.E.64 R48, [R70.64+0x8] ;
/*1700*/ LD.E.64 R72, [R70.64+0x10] ;
/*1710*/ DFMA R36, R56, R36, R66 ;
/*1720*/ IMAD.X R63, RZ, RZ, R39, P2 ;
/*1730*/ DFMA R58, R56, R58, R64 ;
/*1740*/ IADD3 R56, P2, R40, 0x20, RZ ;
/*1750*/ IMAD.X R57, RZ, RZ, R41, P2 ;
/*1760*/ IADD3 R90, P2, R90, 0x4, RZ ;
/*1770*/ DFMA R42, R50, R42, R60 ;
/*1780*/ IADD3.X R6, RZ, R6, RZ, P2, !PT ;
/*1790*/ DFMA R36, R50, R54, R36 ;
/*17a0*/ DFMA R52, R50, R52, R58 ;
/*17b0*/ IADD3 R50, P2, R10, R80, RZ ;
/*17c0*/ IMAD.X R51, R11, 0x1, R79, P2 ;
/*17d0*/ ISETP.GE.U32.AND P2, PT, R90, R93, PT ;
/*17e0*/ ISETP.GE.U32.AND.EX P2, PT, R6, R92, PT, P2 ;
/*17f0*/ DFMA R18, R46, R44, R42 ;
/*1800*/ DFMA R16, R46, R48, R36 ;
/*1810*/ DFMA R2, R46, R72, R52 ;
/*1820*/ @!P2 BRA `(.L_x_614) ;
.L_x_613:
/*1830*/ BSYNC B1 ;
.L_x_612:
/*1840*/ IADD3 R10, P2, R85, -R90.reuse, RZ ;
/*1850*/ BSSY B1, `(.L_x_615) ;
/*1860*/ ISETP.LE.U32.AND P3, PT, R10, 0x1, PT ;
/*1870*/ IMAD.X R11, R89, 0x1, ~R6, P2 ;
/*1880*/ ISETP.GT.U32.AND P2, PT, R85, R90, PT ;
/*1890*/ ISETP.GT.U32.AND.EX P2, PT, R89, R6, PT, P2 ;
/*18a0*/ ISETP.LE.U32.OR.EX P2, PT, R11, RZ, !P2, P3 ;
/*18b0*/ @P2 BRA `(.L_x_616) ;
/*18c0*/ ULDC.64 UR4, c[0x0][0x118] ;
/*18d0*/ LD.E.64 R58, [R56.64] ;
/*18e0*/ LEA R36, P0, R58, R12, 0x3 ;
/*18f0*/ LEA.HI.X R37, R58, R13, R59, 0x3, P0 ;
/*1900*/ LD.E.64 R36, [R36.64] ;
/*1910*/ ST.E.64 [R62.64], R36 ;
/*1920*/ LD.E.64 R44, [R56.64+0x8] ;
/*1930*/ IMAD R11, R59, 0x18, RZ ;
/*1940*/ IMAD.WIDE.U32 R58, R58, 0x18, R14 ;
/*1950*/ LD.E.64 R52, [R50.64] ;
/*1960*/ LEA R54, P0, R44, R12, 0x3 ;
/*1970*/ LEA.HI.X R55, R44, R13, R45, 0x3, P0 ;
/*1980*/ LD.E.64 R54, [R54.64] ;
/*1990*/ IADD3 R10, P0, R50, R80, RZ ;
/*19a0*/ IMAD.IADD R59, R59, 0x1, R11 ;
/*19b0*/ IMAD R39, R45, 0x18, RZ ;
/*19c0*/ IMAD.WIDE.U32 R44, R44, 0x18, R14 ;
/*19d0*/ LD.E.64 R48, [R58.64+0x8] ;
/*19e0*/ IMAD.X R11, R51, 0x1, R79, P0 ;
/*19f0*/ LD.E.64 R46, [R58.64+0x10] ;
/*1a00*/ IMAD.IADD R45, R45, 0x1, R39 ;
/*1a10*/ LD.E.64 R50, [R58.64] ;
/*1a20*/ ST.E.64 [R62.64+0x8], R54 ;
/*1a30*/ LD.E.64 R42, [R10.64] ;
/*1a40*/ LD.E.64 R40, [R44.64] ;
/*1a50*/ LD.E.64 R38, [R44.64+0x8] ;
/*1a60*/ LD.E.64 R36, [R44.64+0x10] ;
/*1a70*/ IADD3 R90, P2, R90, 0x2, RZ ;
/*1a80*/ DFMA R18, R52, R50, R18 ;
/*1a90*/ IADD3.X R6, RZ, R6, RZ, P2, !PT ;
/*1aa0*/ IADD3 R62, P2, R62, 0x10, RZ ;
/*1ab0*/ DFMA R16, R52, R48, R16 ;
/*1ac0*/ IADD3 R50, P0, R10, R80, RZ ;
/*1ad0*/ DFMA R2, R52, R46, R2 ;
/*1ae0*/ IMAD.X R63, RZ, RZ, R63, P2 ;
/*1af0*/ IADD3 R56, P2, R56, 0x10, RZ ;
/*1b00*/ IMAD.X R51, R11, 0x1, R79, P0 ;
/*1b10*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*1b20*/ IADD3.X R57, RZ, R57, RZ, P2, !PT ;
/*1b30*/ DFMA R18, R42, R40, R18 ;
/*1b40*/ DFMA R16, R42, R38, R16 ;
/*1b50*/ DFMA R2, R42, R36, R2 ;
.L_x_616:
/*1b60*/ BSYNC B1 ;
.L_x_615:
/*1b70*/ ISETP.LT.U32.AND P2, PT, R90, R85, PT ;
/*1b80*/ ULDC.64 UR4, c[0x0][0x118] ;
/*1b90*/ ISETP.LT.U32.OR.EX P0, PT, R6, R89, P0, P2 ;
/*1ba0*/ @P0 LD.E.64 R36, [R56.64] ;
/*1bb0*/ @P0 LEA R10, P2, R36, R12, 0x3 ;
/*1bc0*/ @P0 LEA.HI.X R11, R36, R13, R37, 0x3, P2 ;
/*1bd0*/ @P0 LD.E.64 R10, [R10.64] ;
/*1be0*/ @P0 IMAD R39, R37, 0x18, RZ ;
/*1bf0*/ @P0 IMAD.WIDE.U32 R36, R36, 0x18, R14 ;
/*1c00*/ @P0 IMAD.IADD R37, R37, 0x1, R39 ;
/*1c10*/ @P0 ST.E.64 [R62.64], R10 ;
/*1c20*/ @P0 LD.E.64 R38, [R50.64] ;
/*1c30*/ @P0 LD.E.64 R40, [R36.64] ;
/*1c40*/ @P0 LD.E.64 R42, [R36.64+0x8] ;
/*1c50*/ @P0 LD.E.64 R44, [R36.64+0x10] ;
/*1c60*/ @P0 DFMA R18, R38, R40, R18 ;
/*1c70*/ IADD3 R41, P2, R5, R8, RZ ;
/*1c80*/ @P0 DFMA R16, R38, R42, R16 ;
/*1c90*/ IMAD.X R7, R84, 0x1, R7, P2 ;
/*1ca0*/ @P0 DFMA R2, R38, R44, R2 ;
/*1cb0*/ ISETP.LT.U32.AND P0, PT, R41, R0, PT ;
/*1cc0*/ IADD3 R88, P2, R5, R88, RZ ;
/*1cd0*/ ISETP.LT.U32.AND.EX P0, PT, R7, R4, PT, P0 ;
/*1ce0*/ IMAD.X R87, R84, 0x1, R87, P2 ;
/*1cf0*/ IADD3 R9, P2, R9, R20, RZ ;
/*1d00*/ SEL R8, R41, R0, P0 ;
/*1d10*/ SEL R7, R7, R4, P0 ;
/*1d20*/ IMAD.X R74, R74, 0x1, R91, P2 ;
/*1d30*/ IADD3 R75, P0, R75, R83, RZ ;
/*1d40*/ IADD3 R85, P2, R8, 0x1, RZ ;
/*1d50*/ IMAD.X R81, R81, 0x1, R86, P0 ;
/*1d60*/ ISETP.GE.U32.AND P0, PT, R88, R85, PT ;
/*1d70*/ IMAD.X R89, RZ, RZ, R7, P2 ;
/*1d80*/ IADD3 R78, P2, R78, R83, RZ ;
/*1d90*/ ISETP.GE.U32.AND.EX P0, PT, R87, R89, PT, P0 ;
/*1da0*/ IMAD.X R82, R82, 0x1, R86, P2 ;
/*1db0*/ @!P0 BRA `(.L_x_617) ;
.L_x_609:
/*1dc0*/ BSYNC B0 ;
.L_x_608:
/*1dd0*/ STL.64 [R1], R18 ;
/*1de0*/ MOV R38, 0x1 ;
/*1df0*/ STL.64 [R1+0x8], R16 ;
/*1e00*/ STL.64 [R1+0x10], R2 ;
/*1e10*/ @P1 BRA `(.L_x_618) ;
/*1e20*/ LDS R38, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*1e30*/ ISETP.NE.AND P0, PT, R38, RZ, PT ;
/*1e40*/ @!P0 LDS R0, [`(IsSPMDMode)] ;
/*1e50*/ ISETP.NE.AND P1, PT, R0, RZ, !P0 ;
/*1e60*/ @!P0 IMAD.MOV.U32 R0, RZ, RZ, c[0x0][0x0] ;
/*1e70*/ @!P0 IADD3 R22, R0, -0x20, RZ ;
/*1e80*/ @!P0 IMAD.MOV.U32 R0, RZ, RZ, R22 ;
/*1e90*/ @P1 IADD3 R0, RZ, c[0x0][0x0], RZ ;
/*1ea0*/ @!P0 IMAD.MOV.U32 R38, RZ, RZ, R0 ;
.L_x_618:
/*1eb0*/ ISETP.NE.AND P0, PT, R38, 0x1, PT ;
/*1ec0*/ BSSY B9, `(.L_x_619) ;
/*1ed0*/ IMAD.MOV.U32 R48, RZ, RZ, R2 ;
/*1ee0*/ MOV R49, R3 ;
/*1ef0*/ @!P0 BRA `(.L_x_620) ;
/*1f00*/ IADD3 R45, R38.reuse, 0x1f, RZ ;
/*1f10*/ BSSY B7, `(.L_x_621) ;
/*1f20*/ LOP3.LUT P0, R47, R38, 0x1f, RZ, 0xc0, !PT ;
/*1f30*/ IMAD.MOV.U32 R36, RZ, RZ, R48 ;
/*1f40*/ SHF.R.U32.HI R46, RZ, 0x5, R45 ;
/*1f50*/ IMAD.MOV.U32 R37, RZ, RZ, R49 ;
/*1f60*/ IADD3 R3, R46, -0x1, RZ ;
/*1f70*/ ISETP.GE.U32.AND P1, PT, R26, R3, PT ;
/*1f80*/ @P0 BRA P1, `(.L_x_622) ;
/*1f90*/ BRA.DIV ~URZ, `(.L_x_623) ;
/*1fa0*/ SHFL.DOWN PT, R3, R19, 0x10, 0x1f ;
/*1fb0*/ SHFL.DOWN PT, R2, R18, 0x10, 0x1f ;
/*1fc0*/ SHFL.DOWN PT, R5, R17, 0x10, 0x1f ;
/*1fd0*/ SHFL.DOWN PT, R4, R16, 0x10, 0x1f ;
/*1fe0*/ SHFL.DOWN PT, R7, R37, 0x10, 0x1f ;
/*1ff0*/ SHFL.DOWN PT, R6, R36, 0x10, 0x1f ;
/*2000*/ DADD R2, R18, R2 ;
/*2010*/ SHFL.DOWN PT, R9, R3, 0x8, 0x1f ;
/*2020*/ DADD R4, R16, R4 ;
/*2030*/ SHFL.DOWN PT, R8, R2, 0x8, 0x1f ;
/*2040*/ SHFL.DOWN PT, R11, R5, 0x8, 0x1f ;
/*2050*/ DADD R6, R6, R48 ;
/*2060*/ SHFL.DOWN PT, R10, R4, 0x8, 0x1f ;
/*2070*/ SHFL.DOWN PT, R13, R7, 0x8, 0x1f ;
/*2080*/ SHFL.DOWN PT, R12, R6, 0x8, 0x1f ;
/*2090*/ DADD R8, R2, R8 ;
/*20a0*/ SHFL.DOWN PT, R15, R9, 0x4, 0x1f ;
/*20b0*/ DADD R10, R4, R10 ;
/*20c0*/ SHFL.DOWN PT, R14, R8, 0x4, 0x1f ;
/*20d0*/ SHFL.DOWN PT, R17, R11, 0x4, 0x1f ;
/*20e0*/ DADD R12, R6, R12 ;
/*20f0*/ SHFL.DOWN PT, R16, R10, 0x4, 0x1f ;
/*2100*/ SHFL.DOWN PT, R3, R13, 0x4, 0x1f ;
/*2110*/ SHFL.DOWN PT, R2, R12, 0x4, 0x1f ;
/*2120*/ DADD R14, R8, R14 ;
/*2130*/ SHFL.DOWN PT, R7, R15, 0x2, 0x1f ;
/*2140*/ DADD R4, R10, R16 ;
/*2150*/ SHFL.DOWN PT, R6, R14, 0x2, 0x1f ;
/*2160*/ SHFL.DOWN PT, R17, R5, 0x2, 0x1f ;
/*2170*/ DADD R2, R12, R2 ;
/*2180*/ SHFL.DOWN PT, R16, R4, 0x2, 0x1f ;
/*2190*/ SHFL.DOWN PT, R37, R3, 0x2, 0x1f ;
/*21a0*/ SHFL.DOWN PT, R36, R2, 0x2, 0x1f ;
/*21b0*/ DADD R42, R14, R6 ;
/*21c0*/ SHFL.DOWN PT, R19, R43, 0x1, 0x1f ;
/*21d0*/ DADD R16, R4, R16 ;
/*21e0*/ SHFL.DOWN PT, R18, R42, 0x1, 0x1f ;
/*21f0*/ SHFL.DOWN PT, R41, R17, 0x1, 0x1f ;
/*2200*/ DADD R36, R2, R36 ;
/*2210*/ SHFL.DOWN PT, R40, R16, 0x1, 0x1f ;
/*2220*/ SHFL.DOWN PT, R39, R37, 0x1, 0x1f ;
/*2230*/ SHFL.DOWN PT, R4, R36, 0x1, 0x1f ;
.L_x_766:
/*2240*/ MOV R5, R39 ;
/*2250*/ DADD R18, R18, R42 ;
/*2260*/ DADD R16, R40, R16 ;
/*2270*/ STL.64 [R1], R18 ;
/*2280*/ DADD R36, R4, R36 ;
/*2290*/ STL.64 [R1+0x8], R16 ;
/*22a0*/ STL.64 [R1+0x10], R36 ;
/*22b0*/ BRA `(.L_x_624) ;
.L_x_622:
/*22c0*/ ISETP.GE.U32.AND P0, PT, R47, 0x2, PT ;
/*22d0*/ @!P0 BRA `(.L_x_624) ;
/*22e0*/ BSSY B6, `(.L_x_625) ;
/*22f0*/ SHF.R.U32.HI R39, RZ, 0x1, R47 ;
.L_x_627:
/*2300*/ BRA.DIV ~URZ, `(.L_x_626) ;
/*2310*/ SHFL.DOWN PT, R41, R19, R39, 0x1f ;
/*2320*/ SHFL.DOWN PT, R40, R18, R39, 0x1f ;
/*2330*/ SHFL.DOWN PT, R43, R17, R39, 0x1f ;
/*2340*/ SHFL.DOWN PT, R42, R16, R39, 0x1f ;
/*2350*/ SHFL.DOWN PT, R44, R37, R39, 0x1f ;
/*2360*/ SHFL.DOWN PT, R2, R36, R39, 0x1f ;
.L_x_767:
/*2370*/ LOP3.LUT R0, R39, 0xffff, RZ, 0xc0, !PT ;
/*2380*/ IMAD.MOV.U32 R3, RZ, RZ, R44 ;
/*2390*/ IADD3 R39, R47, 0x1, RZ ;
/*23a0*/ ISETP.GE.U32.AND P0, PT, R27, R0, PT ;
/*23b0*/ SHF.R.U32.HI R0, RZ, 0x1, R39.reuse ;
/*23c0*/ SHF.R.U32.HI R39, RZ, 0x2, R39 ;
/*23d0*/ @!P0 DADD R40, R40, R18 ;
/*23e0*/ @!P0 DADD R42, R42, R16 ;
/*23f0*/ @!P0 DADD R2, R2, R36 ;
/*2400*/ ISETP.GT.U32.AND P0, PT, R47, 0x2, PT ;
/*2410*/ IMAD.MOV.U32 R47, RZ, RZ, R0 ;
/*2420*/ MOV R18, R40 ;
/*2430*/ IMAD.MOV.U32 R19, RZ, RZ, R41 ;
/*2440*/ IMAD.MOV.U32 R16, RZ, RZ, R42 ;
/*2450*/ IMAD.MOV.U32 R17, RZ, RZ, R43 ;
/*2460*/ IMAD.MOV.U32 R36, RZ, RZ, R2 ;
/*2470*/ MOV R37, R3 ;
/*2480*/ @P0 BRA `(.L_x_627) ;
/*2490*/ BSYNC B6 ;
.L_x_625:
/*24a0*/ STL.64 [R1+0x10], R2 ;
/*24b0*/ IMAD.MOV.U32 R16, RZ, RZ, R42 ;
/*24c0*/ MOV R18, R40 ;
/*24d0*/ IMAD.MOV.U32 R17, RZ, RZ, R43 ;
/*24e0*/ STL.64 [R1], R40 ;
/*24f0*/ MOV R19, R41 ;
/*2500*/ STL.64 [R1+0x8], R42 ;
.L_x_624:
/*2510*/ BSYNC B7 ;
.L_x_621:
/*2520*/ ISETP.GE.U32.AND P1, PT, R38, 0x21, PT ;
/*2530*/ BSSY B8, `(.L_x_628) ;
/*2540*/ ISETP.NE.AND P2, PT, R34, RZ, PT ;
/*2550*/ ISETP.NE.AND P0, PT, R33.reuse, R28, PT ;
/*2560*/ SEL R0, R33, RZ, P2 ;
/*2570*/ SEL R34, R0, R33, !P0 ;
/*2580*/ @!P1 BRA `(.L_x_629) ;
/*2590*/ BSSY B7, `(.L_x_630) ;
/*25a0*/ MOV R2, RZ ;
.L_x_643:
/*25b0*/ LDS R0, [`(IsSPMDMode)] ;
/*25c0*/ YIELD (*"RELOCATOR OPCODE,YIELD,280"*);
/*25d0*/ BSSY B6, `(.L_x_631) ;
/*25e0*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*25f0*/ @P0 BRA `(.L_x_632) ;
/*2600*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*2610*/ ISETP.NE.AND P1, PT, R33, R28, PT ;
/*2620*/ ISETP.NE.OR P0, PT, R0, RZ, P1 ;
/*2630*/ @P0 BRA `(.L_x_633) ;
/*2640*/ @!PT LDS RZ, [RZ] ;
/*2650*/ @!PT LDS RZ, [RZ] ;
/*2660*/ @!PT LDS RZ, [RZ] ;
/*2670*/ @!PT LDS RZ, [RZ] ;
/*2680*/ MEMBAR.SC.GPU ;
/*2690*/ ERRBAR;
/*26a0*/ CCTL.IVALL ;
/*26b0*/ BRA `(.L_x_634) ;
.L_x_633:
/*26c0*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*26d0*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*26e0*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*26f0*/ SEL R5, R22, R3, !P1 ;
/*2700*/ SEL R5, R5, 0x1, !P0 ;
/*2710*/ BRA.DIV ~URZ, `(.L_x_635) ;
/*2720*/ BAR.SYNC 0x7, R5 ;
/*2730*/ BRA `(.L_x_634) ;
.L_x_632:
/*2740*/ WARPSYNC 0xffffffff ;
/*2750*/ BAR.SYNC 0x0 ;
.L_x_634:
/*2760*/ BSYNC B6 ;
.L_x_631:
/*2770*/ ISETP.NE.AND P0, PT, R35, RZ, PT ;
/*2780*/ BSSY B0, `(.L_x_636) ;
/*2790*/ IADD3 R37, R1, R2, RZ ;
/*27a0*/ @P0 BRA `(.L_x_637) ;
/*27b0*/ LDL R3, [R37] ;
/*27c0*/ STS [R24+`(__openmp_nvptx_data_transfer_temporary_storage)], R3 ;
.L_x_637:
/*27d0*/ BSYNC B0 ;
.L_x_636:
/*27e0*/ LDS R0, [`(IsSPMDMode)] ;
/*27f0*/ BSSY B6, `(.L_x_638) ;
/*2800*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*2810*/ @P0 BRA `(.L_x_639) ;
/*2820*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*2830*/ ISETP.NE.AND P1, PT, R33, R28, PT ;
/*2840*/ ISETP.NE.OR P0, PT, R0, RZ, P1 ;
/*2850*/ @P0 BRA `(.L_x_640) ;
/*2860*/ @!PT LDS RZ, [RZ] ;
/*2870*/ @!PT LDS RZ, [RZ] ;
/*2880*/ @!PT LDS RZ, [RZ] ;
/*2890*/ @!PT LDS RZ, [RZ] ;
/*28a0*/ MEMBAR.SC.GPU ;
/*28b0*/ ERRBAR;
/*28c0*/ CCTL.IVALL ;
/*28d0*/ BRA `(.L_x_641) ;
.L_x_640:
/*28e0*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*28f0*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*2900*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*2910*/ SEL R5, R22, R3, !P1 ;
/*2920*/ SEL R5, R5, 0x1, !P0 ;
/*2930*/ BRA.DIV ~URZ, `(.L_x_642) ;
/*2940*/ BAR.SYNC 0x7, R5 ;
/*2950*/ BRA `(.L_x_641) ;
.L_x_639:
/*2960*/ WARPSYNC 0xffffffff ;
/*2970*/ BAR.SYNC 0x0 ;
.L_x_641:
/*2980*/ BSYNC B6 ;
.L_x_638:
/*2990*/ ISETP.GE.U32.AND P0, PT, R33, R46, PT ;
/*29a0*/ IADD3 R2, R2, 0x4, RZ ;
/*29b0*/ @!P0 LDS R0, [R25+`(__openmp_nvptx_data_transfer_temporary_storage)] ;
/*29c0*/ @!P0 STL [R37], R0 ;
/*29d0*/ ISETP.NE.AND P0, PT, R2, 0x18, PT ;
/*29e0*/ @P0 BRA `(.L_x_643) ;
/*29f0*/ BSYNC B7 ;
.L_x_630:
/*2a00*/ ISETP.GE.U32.AND P0, PT, R45, 0x40, PT ;
/*2a10*/ ISETP.GT.U32.OR P0, PT, R33, 0x1f, !P0 ;
/*2a20*/ @P0 BRA `(.L_x_629) ;
/*2a30*/ LDL.64 R16, [R1+0x10] ;
/*2a40*/ LDL.64 R18, [R1] ;
/*2a50*/ LDL.64 R36, [R1+0x8] ;
/*2a60*/ BSSY B6, `(.L_x_644) ;
/*2a70*/ SHF.R.U32.HI R2, RZ, 0x6, R45 ;
.L_x_646:
/*2a80*/ PRMT R43, R2, 0x9910, RZ ;
/*2a90*/ BRA.DIV ~URZ, `(.L_x_645) ;
/*2aa0*/ SHFL.DOWN PT, R41, R19, R43, 0x1f ;
/*2ab0*/ SHFL.DOWN PT, R40, R18, R43, 0x1f ;
/*2ac0*/ SHFL.DOWN PT, R39, R37, R43, 0x1f ;
/*2ad0*/ SHFL.DOWN PT, R38, R36, R43, 0x1f ;
/*2ae0*/ SHFL.DOWN PT, R22, R17, R43, 0x1f ;
/*2af0*/ SHFL.DOWN PT, R4, R16, R43, 0x1f ;
.L_x_768:
/*2b00*/ LOP3.LUT R2, R2, 0xffff, RZ, 0xc0, !PT ;
/*2b10*/ IMAD.MOV.U32 R5, RZ, RZ, R22 ;
/*2b20*/ LOP3.LUT R3, R34, 0xffff, RZ, 0xc0, !PT ;
/*2b30*/ ISETP.GE.U32.AND P0, PT, R3, R2, PT ;
/*2b40*/ IADD3 R2, R46, 0x1, RZ ;
/*2b50*/ SHF.R.U32.HI R0, RZ, 0x1, R2.reuse ;
/*2b60*/ SHF.R.U32.HI R2, RZ, 0x2, R2 ;
/*2b70*/ @!P0 DADD R40, R40, R18 ;
/*2b80*/ @!P0 DADD R38, R38, R36 ;
/*2b90*/ @!P0 DADD R4, R4, R16 ;
/*2ba0*/ ISETP.GT.U32.AND P0, PT, R46, 0x2, PT ;
/*2bb0*/ IMAD.MOV.U32 R46, RZ, RZ, R0 ;
/*2bc0*/ MOV R18, R40 ;
/*2bd0*/ IMAD.MOV.U32 R19, RZ, RZ, R41 ;
/*2be0*/ IMAD.MOV.U32 R36, RZ, RZ, R38 ;
/*2bf0*/ IMAD.MOV.U32 R37, RZ, RZ, R39 ;
/*2c00*/ IMAD.MOV.U32 R16, RZ, RZ, R4 ;
/*2c10*/ MOV R17, R5 ;
/*2c20*/ @P0 BRA `(.L_x_646) ;
/*2c30*/ BSYNC B6 ;
.L_x_644:
/*2c40*/ STL.64 [R1+0x10], R4 ;
/*2c50*/ MOV R16, R36 ;
/*2c60*/ MOV R17, R37 ;
.L_x_629:
/*2c70*/ BSYNC B8 ;
.L_x_628:
/*2c80*/ ISETP.NE.AND P0, PT, R34, RZ, PT ;
/*2c90*/ @P0 BREAK B9 ;
/*2ca0*/ @P0 BRA `(.L_x_606) ;
/*2cb0*/ LDL.64 R48, [R1+0x10] ;
.L_x_620:
/*2cc0*/ BSYNC B9 ;
.L_x_619:
/*2cd0*/ LDS.64 R2, [`($__ratioGradRef_local_shared__2701)] ;
/*2ce0*/ LDS.64 R6, [`(($__ratioGradRef_local_shared__2701 + 0x8))] ;
/*2cf0*/ LDS.64 R4, [`(($__ratioGradRef_local_shared__2701 + 0x10))] ;
/*2d00*/ DADD R2, R2, R18 ;
/*2d10*/ DADD R16, R16, R6 ;
/*2d20*/ STS.64 [`($__ratioGradRef_local_shared__2701)], R2 ;
/*2d30*/ DADD R4, R4, R48 ;
/*2d40*/ STS.64 [`(($__ratioGradRef_local_shared__2701 + 0x8))], R16 ;
/*2d50*/ STS.64 [`(($__ratioGradRef_local_shared__2701 + 0x10))], R4 ;
.L_x_606:
/*2d60*/ BSYNC B10 ;
.L_x_605:
/*2d70*/ ISETP.NE.AND P0, PT, R23, RZ, PT ;
/*2d80*/ WARPSYNC 0xffffffff ;
/*2d90*/ BAR.SYNC 0x0 ;
/*2da0*/ @!P0 MOV R0, 0x1 ;
/*2db0*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_0)], RZ ;
/*2dc0*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_1)], RZ ;
/*2dd0*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_2)], R0 ;
/*2de0*/ BAR.SYNC 0x0 ;
/*2df0*/ BAR.SYNC 0x0 ;
/*2e00*/ BRA `(.L_x_647) ;
.L_x_604:
/*2e10*/ ISETP.GT.U32.AND P0, PT, R2, 0x1, PT ;
/*2e20*/ @P0 BRA `(.L_x_648) ;
/*2e30*/ IMAD.MOV.U32 R0, RZ, RZ, 0x1 ;
/*2e40*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], R0 ;
/*2e50*/ LDS.64 R4, [`($__NumPtcls1_shared2_$_0__2691)] ;
/*2e60*/ ISETP.NE.U32.AND P0, PT, R4, RZ, PT ;
/*2e70*/ ISETP.NE.AND.EX P0, PT, R5, RZ, PT, P0 ;
/*2e80*/ @!P0 BRA `(.L_x_649) ;
/*2e90*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_1)] ;
/*2ea0*/ BSSY B0, `(.L_x_650) ;
/*2eb0*/ CS2R R2, SRZ ;
/*2ec0*/ CS2R R18, SRZ ;
/*2ed0*/ LDS R7, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*2ee0*/ CS2R R16, SRZ ;
/*2ef0*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*2f00*/ IADD3 R0, P1, R4, -0x1, RZ ;
/*2f10*/ SEL R37, R33, RZ, !P0 ;
/*2f20*/ IADD3.X R4, R5, -0x1, RZ, P1, !PT ;
/*2f30*/ ISETP.GE.U32.AND P0, PT, R0, R37, PT ;
/*2f40*/ SHF.R.S32.HI R5, RZ, 0x1f, R37 ;
/*2f50*/ ISETP.NE.AND P1, PT, R7, RZ, PT ;
/*2f60*/ ISETP.GE.U32.AND.EX P0, PT, R4, R5, PT, P0 ;
/*2f70*/ SEL R34, R22, R7, !P1 ;
/*2f80*/ @!P0 BRA `(.L_x_651) ;
/*2f90*/ LDS.64 R6, [`($__iw_shared5_$_0__2697)] ;
/*2fa0*/ ULDC.64 UR4, c[0x0][0x118] ;
/*2fb0*/ LDS.64 R10, [`($__psiMinv_temp_list_devptr5_shared_$_0__2703)] ;
/*2fc0*/ LDS.64 R8, [`($__psiV_temp_list_ptr3_shared4_$_0__2695)] ;
/*2fd0*/ LDS.64 R12, [`($__dpsiV_list_ptr8_shared_$_0__2709)] ;
/*2fe0*/ LDS.64 R2, [`($__psiV_list_devptr4_shared6_$_0__2699)] ;
/*2ff0*/ LDS R76, [`($__WorkingIndex7_shared7_$_0__2707)] ;
/*3000*/ IMAD.SHL.U32 R21, R6.reuse, 0x8, RZ ;
/*3010*/ SHF.L.U64.HI R7, R6, 0x3, R7 ;
/*3020*/ IADD3 R10, P1, R10, R21.reuse, RZ ;
/*3030*/ IADD3 R78, P0, R8, R21, RZ ;
/*3040*/ IMAD.X R11, R11, 0x1, R7.reuse, P1 ;
/*3050*/ IMAD.X R79, R9, 0x1, R7, P0 ;
/*3060*/ IADD3 R20, P1, R12, R21, RZ ;
/*3070*/ LD.E.64 R10, [R10.64] ;
/*3080*/ IADD3 R14, P0, R2, R21, RZ ;
/*3090*/ LD.E.64 R78, [R78.64] ;
/*30a0*/ IADD3.X R15, R3, R7, RZ, P0, !PT ;
/*30b0*/ IMAD.X R21, R13, 0x1, R7, P1 ;
/*30c0*/ LDS.64 R12, [`($__psiMinv_cols6_shared_$_0__2705)] ;
/*30d0*/ LD.E.64 R14, [R14.64] ;
/*30e0*/ LD.E.64 R20, [R20.64] ;
/*30f0*/ IMAD.MOV.U32 R7, RZ, RZ, R37 ;
/*3100*/ SHF.R.S32.HI R77, RZ, 0x1f, R76 ;
/*3110*/ CS2R R18, SRZ ;
/*3120*/ LDS.64 R2, [`($__confgListOccup_ptr2_shared3_$_0__2693)] ;
/*3130*/ MOV R6, R5 ;
/*3140*/ IMAD.SHL.U32 R75, R7, 0x8, RZ ;
/*3150*/ SHF.R.S32.HI R81, RZ, 0x1f, R34 ;
/*3160*/ IMAD.MOV.U32 R85, RZ, RZ, R7 ;
/*3170*/ SHF.L.U32 R80, R34.reuse, 0x3, RZ ;
/*3180*/ IMAD.MOV.U32 R84, RZ, RZ, R6 ;
/*3190*/ SHF.L.U64.HI R83, R34, 0x3, R81 ;
/*31a0*/ IMAD R5, R13, R7.reuse, RZ ;
/*31b0*/ SHF.L.U64.HI R9, R12.reuse, 0x3, R13 ;
/*31c0*/ IMAD.WIDE.U32 R76, R12, R7, R76 ;
/*31d0*/ IMAD R17, R12, R6, R5 ;
/*31e0*/ SHF.L.U64.HI R5, R7, 0x3, R6 ;
/*31f0*/ IMAD.IADD R13, R77, 0x1, R17 ;
/*3200*/ SHF.L.U32 R77, R12, 0x3, RZ ;
/*3210*/ CS2R R16, SRZ ;
/*3220*/ IMAD.WIDE.U32 R36, R77, R34, RZ ;
/*3230*/ LEA R8, P0, R76, R10, 0x3 ;
/*3240*/ IMAD R10, R9, R34, RZ ;
/*3250*/ IADD3 R74, P1, R78, R75, RZ ;
/*3260*/ IMAD R87, R81, R77, R10 ;
/*3270*/ IADD3 R75, P2, R2, R75, RZ ;
/*3280*/ LEA.HI.X R76, R76, R11, R13, 0x3, P0 ;
/*3290*/ IMAD.X R78, R79, 0x1, R5.reuse, P1 ;
/*32a0*/ IADD3 R82, P0, R7, 0x1, RZ ;
/*32b0*/ IMAD.X R79, R3, 0x1, R5, P2 ;
/*32c0*/ CS2R R2, SRZ ;
/*32d0*/ IADD3.X R86, RZ, R6, RZ, P0, !PT ;
/*32e0*/ IMAD.IADD R87, R37, 0x1, R87 ;
.L_x_659:
/*32f0*/ ISETP.GE.U32.AND P1, PT, R85, R82, PT ;
/*3300*/ BSSY B1, `(.L_x_652) ;
/*3310*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x80, 0x0 ;
/*3320*/ IMAD.MOV.U32 R60, RZ, RZ, R75 ;
/*3330*/ ISETP.GE.U32.AND.EX P1, PT, R84, R86, PT, P1 ;
/*3340*/ IMAD.MOV.U32 R61, RZ, RZ, R79 ;
/*3350*/ MOV R54, R74 ;
/*3360*/ IMAD.MOV.U32 R55, RZ, RZ, R78 ;
/*3370*/ MOV R44, R8 ;
/*3380*/ IMAD.MOV.U32 R45, RZ, RZ, R76 ;
/*3390*/ MOV R89, R85 ;
/*33a0*/ IMAD.MOV.U32 R5, RZ, RZ, R84 ;
/*33b0*/ @!P1 BRA `(.L_x_653) ;
/*33c0*/ ULDC.64 UR4, c[0x0][0x118] ;
/*33d0*/ LD.E.64 R42, [R60.64] ;
/*33e0*/ LEA R46, P0, R42, R14, 0x3 ;
/*33f0*/ LEA.HI.X R47, R42, R15, R43, 0x3, P0 ;
/*3400*/ LD.E.64 R46, [R46.64] ;
/*3410*/ IMAD R11, R43, 0x18, RZ ;
/*3420*/ IMAD.WIDE.U32 R42, R42, 0x18, R20 ;
/*3430*/ IMAD.IADD R43, R43, 0x1, R11 ;
/*3440*/ IADD3 R89, P1, R89, 0x1, RZ ;
/*3450*/ ST.E.64 [R54.64], R46 ;
/*3460*/ LD.E.64 R40, [R44.64] ;
/*3470*/ LD.E.64 R38, [R42.64] ;
/*3480*/ LD.E.64 R12, [R42.64+0x8] ;
/*3490*/ LD.E.64 R10, [R42.64+0x10] ;
/*34a0*/ IMAD.X R5, RZ, RZ, R5, P1 ;
/*34b0*/ IADD3 R44, P1, R44, R77, RZ ;
/*34c0*/ IADD3 R54, P2, R54, 0x8, RZ ;
/*34d0*/ IADD3 R60, P3, R60, 0x8, RZ ;
/*34e0*/ IMAD.X R45, R45, 0x1, R9, P1 ;
/*34f0*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*3500*/ IMAD.X R55, RZ, RZ, R55, P2 ;
/*3510*/ IMAD.X R61, RZ, RZ, R61, P3 ;
/*3520*/ DFMA R16, R40, R38, R16 ;
/*3530*/ DFMA R18, R40, R12, R18 ;
/*3540*/ DFMA R2, R40, R10, R2 ;
.L_x_653:
/*3550*/ BSYNC B1 ;
.L_x_652:
/*3560*/ IADD3 R10, P3, R82.reuse, -R89.reuse, RZ ;
/*3570*/ BSSY B1, `(.L_x_654) ;
/*3580*/ ISETP.GT.U32.AND P2, PT, R82, R89, PT ;
/*3590*/ ISETP.LE.U32.AND P1, PT, R10, 0x3, PT ;
/*35a0*/ IMAD.X R10, R86.reuse, 0x1, ~R5, P3 ;
/*35b0*/ ISETP.GT.U32.AND.EX P2, PT, R86, R5, PT, P2 ;
/*35c0*/ ISETP.LE.U32.OR.EX P1, PT, R10, RZ, !P2, P1 ;
/*35d0*/ @P1 BRA `(.L_x_655) ;
/*35e0*/ IADD3 R90, P1, R82, -0x3, RZ ;
/*35f0*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*3600*/ IADD3.X R88, R86, -0x1, RZ, P1, !PT ;
.L_x_656:
/*3610*/ IMAD.MOV.U32 R10, RZ, RZ, R60 ;
/*3620*/ ULDC.64 UR4, c[0x0][0x118] ;
/*3630*/ IMAD.MOV.U32 R11, RZ, RZ, R61 ;
/*3640*/ LD.E.64 R12, [R10.64] ;
/*3650*/ LEA R38, P1, R12, R14, 0x3 ;
/*3660*/ LEA.HI.X R39, R12, R15, R13, 0x3, P1 ;
/*3670*/ LD.E.64 R38, [R38.64] ;
/*3680*/ IMAD.MOV.U32 R42, RZ, RZ, R54 ;
/*3690*/ MOV R43, R55 ;
/*36a0*/ ST.E.64 [R42.64], R38 ;
/*36b0*/ LD.E.64 R40, [R10.64+0x8] ;
/*36c0*/ IMAD R13, R13, 0x18, RZ ;
/*36d0*/ IMAD.WIDE.U32 R50, R12, 0x18, R20 ;
/*36e0*/ LD.E.64 R62, [R44.64] ;
/*36f0*/ LEA R46, P1, R40, R14, 0x3 ;
/*3700*/ LEA.HI.X R47, R40, R15, R41, 0x3, P1 ;
/*3710*/ LD.E.64 R46, [R46.64] ;
/*3720*/ IMAD.IADD R51, R51, 0x1, R13 ;
/*3730*/ LD.E.64 R68, [R50.64] ;
/*3740*/ LD.E.64 R66, [R50.64+0x8] ;
/*3750*/ LD.E.64 R64, [R50.64+0x10] ;
/*3760*/ ST.E.64 [R42.64+0x8], R46 ;
/*3770*/ LD.E.64 R48, [R10.64+0x10] ;
/*3780*/ IMAD R53, R41, 0x18, RZ ;
/*3790*/ LEA R38, P1, R48, R14, 0x3 ;
/*37a0*/ LEA.HI.X R39, R48, R15, R49, 0x3, P1 ;
/*37b0*/ LD.E.64 R38, [R38.64] ;
/*37c0*/ IADD3 R12, P1, R44, R77, RZ ;
/*37d0*/ IMAD.WIDE.U32 R40, R40, 0x18, R20 ;
/*37e0*/ IMAD.X R13, R45, 0x1, R9, P1 ;
/*37f0*/ IMAD.IADD R41, R41, 0x1, R53 ;
/*3800*/ LD.E.64 R54, [R12.64] ;
/*3810*/ LD.E.64 R60, [R40.64] ;
/*3820*/ LD.E.64 R58, [R40.64+0x8] ;
/*3830*/ LD.E.64 R56, [R40.64+0x10] ;
/*3840*/ ST.E.64 [R42.64+0x10], R38 ;
/*3850*/ LD.E.64 R70, [R10.64+0x18] ;
/*3860*/ IMAD R45, R49, 0x18, RZ ;
/*3870*/ IMAD.WIDE.U32 R48, R48, 0x18, R20 ;
/*3880*/ LEA R92, P1, R70, R14, 0x3 ;
/*3890*/ LEA.HI.X R93, R70, R15, R71, 0x3, P1 ;
/*38a0*/ LD.E.64 R92, [R92.64] ;
/*38b0*/ IMAD R47, R71, 0x18, RZ ;
/*38c0*/ IADD3 R46, P1, R12, R77, RZ ;
/*38d0*/ IMAD.WIDE.U32 R70, R70, 0x18, R20 ;
/*38e0*/ IADD3 R49, R49, R45, RZ ;
/*38f0*/ IMAD.IADD R71, R71, 0x1, R47 ;
/*3900*/ IMAD.X R47, R13, 0x1, R9, P1 ;
/*3910*/ IADD3 R38, P1, R46, R77, RZ ;
/*3920*/ LD.E.64 R50, [R48.64+0x8] ;
/*3930*/ IADD3.X R39, R47, R9, RZ, P1, !PT ;
/*3940*/ LD.E.64 R52, [R48.64] ;
/*3950*/ LD.E.64 R46, [R46.64] ;
/*3960*/ LD.E.64 R48, [R48.64+0x10] ;
/*3970*/ ST.E.64 [R42.64+0x18], R92 ;
/*3980*/ LD.E.64 R12, [R38.64] ;
/*3990*/ LD.E.64 R44, [R70.64+0x8] ;
/*39a0*/ LD.E.64 R40, [R70.64] ;
/*39b0*/ LD.E.64 R72, [R70.64+0x10] ;
/*39c0*/ IADD3 R89, P1, R89, 0x4, RZ ;
/*39d0*/ DFMA R66, R62, R66, R18 ;
/*39e0*/ DFMA R68, R62.reuse, R68, R16 ;
/*39f0*/ IMAD.X R5, RZ, RZ, R5, P1 ;
/*3a00*/ ISETP.GE.U32.AND P1, PT, R89, R90, PT ;
/*3a10*/ DFMA R64, R62, R64, R2 ;
/*3a20*/ DFMA R58, R54, R58, R66 ;
/*3a30*/ ISETP.GE.U32.AND.EX P1, PT, R5, R88, PT, P1 ;
/*3a40*/ DFMA R60, R54, R60, R68 ;
/*3a50*/ DFMA R56, R54, R56, R64 ;
/*3a60*/ IADD3 R54, P2, R42, 0x20, RZ ;
/*3a70*/ DFMA R50, R46, R50, R58 ;
/*3a80*/ DFMA R52, R46, R52, R60 ;
/*3a90*/ IMAD.X R55, RZ, RZ, R43, P2 ;
/*3aa0*/ DFMA R48, R46, R48, R56 ;
/*3ab0*/ IADD3 R60, P2, R10, 0x20, RZ ;
/*3ac0*/ IADD3.X R61, RZ, R11, RZ, P2, !PT ;
/*3ad0*/ DFMA R18, R12.reuse, R44, R50 ;
/*3ae0*/ IADD3 R44, P3, R38, R77, RZ ;
/*3af0*/ DFMA R16, R12, R40, R52 ;
/*3b00*/ DFMA R2, R12, R72, R48 ;
/*3b10*/ IMAD.X R45, R39, 0x1, R9, P3 ;
/*3b20*/ @!P1 BRA `(.L_x_656) ;
.L_x_655:
/*3b30*/ BSYNC B1 ;
.L_x_654:
/*3b40*/ IADD3 R10, P3, R82.reuse, -R89.reuse, RZ ;
/*3b50*/ BSSY B1, `(.L_x_657) ;
/*3b60*/ ISETP.GT.U32.AND P2, PT, R82, R89, PT ;
/*3b70*/ ISETP.LE.U32.AND P1, PT, R10, 0x1, PT ;
/*3b80*/ IMAD.X R10, R86.reuse, 0x1, ~R5, P3 ;
/*3b90*/ ISETP.GT.U32.AND.EX P2, PT, R86, R5, PT, P2 ;
/*3ba0*/ ISETP.LE.U32.OR.EX P1, PT, R10, RZ, !P2, P1 ;
/*3bb0*/ @P1 BRA `(.L_x_658) ;
/*3bc0*/ ULDC.64 UR4, c[0x0][0x118] ;
/*3bd0*/ LD.E.64 R58, [R60.64] ;
/*3be0*/ LEA R12, P0, R58, R14, 0x3 ;
/*3bf0*/ LEA.HI.X R13, R58, R15, R59, 0x3, P0 ;
/*3c00*/ LD.E.64 R12, [R12.64] ;
/*3c10*/ ST.E.64 [R54.64], R12 ;
/*3c20*/ LD.E.64 R52, [R60.64+0x8] ;
/*3c30*/ IMAD R11, R59, 0x18, RZ ;
/*3c40*/ IMAD.WIDE.U32 R58, R58, 0x18, R20 ;
/*3c50*/ LD.E.64 R50, [R44.64] ;
/*3c60*/ LEA R56, P0, R52, R14, 0x3 ;
/*3c70*/ LEA.HI.X R57, R52, R15, R53, 0x3, P0 ;
/*3c80*/ LD.E.64 R56, [R56.64] ;
/*3c90*/ IADD3 R10, P0, R44, R77, RZ ;
/*3ca0*/ IMAD.IADD R59, R59, 0x1, R11 ;
/*3cb0*/ IMAD R39, R53, 0x18, RZ ;
/*3cc0*/ IMAD.WIDE.U32 R52, R52, 0x18, R20 ;
/*3cd0*/ LD.E.64 R48, [R58.64] ;
/*3ce0*/ IMAD.X R11, R45, 0x1, R9, P0 ;
/*3cf0*/ LD.E.64 R46, [R58.64+0x8] ;
/*3d00*/ IMAD.IADD R53, R53, 0x1, R39 ;
/*3d10*/ LD.E.64 R44, [R58.64+0x10] ;
/*3d20*/ ST.E.64 [R54.64+0x8], R56 ;
/*3d30*/ LD.E.64 R42, [R10.64] ;
/*3d40*/ LD.E.64 R12, [R52.64] ;
/*3d50*/ LD.E.64 R38, [R52.64+0x8] ;
/*3d60*/ LD.E.64 R40, [R52.64+0x10] ;
/*3d70*/ IADD3 R89, P1, R89, 0x2, RZ ;
/*3d80*/ DFMA R16, R50, R48, R16 ;
/*3d90*/ IADD3 R54, P2, R54, 0x10, RZ ;
/*3da0*/ DFMA R18, R50, R46, R18 ;
/*3db0*/ IADD3.X R5, RZ, R5, RZ, P1, !PT ;
/*3dc0*/ DFMA R2, R50, R44, R2 ;
/*3dd0*/ IADD3 R60, P3, R60, 0x10, RZ ;
/*3de0*/ IADD3 R44, P1, R10, R77, RZ ;
/*3df0*/ IMAD.X R55, RZ, RZ, R55, P2 ;
/*3e00*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*3e10*/ IADD3.X R61, RZ, R61, RZ, P3, !PT ;
/*3e20*/ IMAD.X R45, R11, 0x1, R9, P1 ;
/*3e30*/ DFMA R16, R42, R12, R16 ;
/*3e40*/ DFMA R18, R42, R38, R18 ;
/*3e50*/ DFMA R2, R42, R40, R2 ;
.L_x_658:
/*3e60*/ BSYNC B1 ;
.L_x_657:
/*3e70*/ ISETP.LT.U32.AND P1, PT, R89, R82, PT ;
/*3e80*/ ULDC.64 UR4, c[0x0][0x118] ;
/*3e90*/ ISETP.LT.U32.OR.EX P0, PT, R5, R86, P0, P1 ;
/*3ea0*/ @P0 LD.E.64 R10, [R60.64] ;
/*3eb0*/ @P0 LEA R12, P1, R10, R14, 0x3 ;
/*3ec0*/ @P0 LEA.HI.X R13, R10, R15, R11, 0x3, P1 ;
/*3ed0*/ @P0 LD.E.64 R12, [R12.64] ;
/*3ee0*/ @P0 IMAD R5, R11, 0x18, RZ ;
/*3ef0*/ @P0 IMAD.WIDE.U32 R10, R10, 0x18, R20 ;
/*3f00*/ @P0 IMAD.IADD R11, R11, 0x1, R5 ;
/*3f10*/ IADD3 R7, P1, R34, R7, RZ ;
/*3f20*/ IMAD.X R5, R81, 0x1, R6, P1 ;
/*3f30*/ ISETP.LT.U32.AND P1, PT, R7, R0, PT ;
/*3f40*/ @P0 ST.E.64 [R54.64], R12 ;
/*3f50*/ @P0 LD.E.64 R38, [R44.64] ;
/*3f60*/ @P0 LD.E.64 R40, [R10.64] ;
/*3f70*/ @P0 LD.E.64 R42, [R10.64+0x8] ;
/*3f80*/ @P0 LD.E.64 R44, [R10.64+0x10] ;
/*3f90*/ ISETP.LT.U32.AND.EX P1, PT, R5, R4, PT, P1 ;
/*3fa0*/ IADD3 R85, P3, R34, R85, RZ ;
/*3fb0*/ SEL R7, R7, R0, P1 ;
/*3fc0*/ SEL R6, R5, R4, P1 ;
/*3fd0*/ IMAD.X R84, R81, 0x1, R84, P3 ;
/*3fe0*/ IADD3 R82, P2, R7, 0x1, RZ ;
/*3ff0*/ IADD3 R8, P3, R8, R36, RZ ;
/*4000*/ IADD3 R75, P1, R75, R80.reuse, RZ ;
/*4010*/ IMAD.X R86, RZ, RZ, R6, P2 ;
/*4020*/ IADD3 R74, P2, R74, R80, RZ ;
/*4030*/ IMAD.X R76, R76, 0x1, R87, P3 ;
/*4040*/ IMAD.X R79, R79, 0x1, R83.reuse, P1 ;
/*4050*/ IMAD.X R78, R78, 0x1, R83, P2 ;
/*4060*/ @P0 DFMA R16, R38, R40, R16 ;
/*4070*/ @P0 DFMA R18, R38, R42, R18 ;
/*4080*/ @P0 DFMA R2, R38, R44, R2 ;
/*4090*/ ISETP.GE.U32.AND P0, PT, R85, R82, PT ;
/*40a0*/ ISETP.GE.U32.AND.EX P0, PT, R84, R86, PT, P0 ;
/*40b0*/ @!P0 BRA `(.L_x_659) ;
.L_x_651:
/*40c0*/ BSYNC B0 ;
.L_x_650:
/*40d0*/ STL.64 [R1], R16 ;
/*40e0*/ ISETP.NE.AND P0, PT, R34, 0x1, PT ;
/*40f0*/ BSSY B8, `(.L_x_649) ;
/*4100*/ IMAD.MOV.U32 R36, RZ, RZ, R2 ;
/*4110*/ STL.64 [R1+0x8], R18 ;
/*4120*/ MOV R37, R3 ;
/*4130*/ STL.64 [R1+0x10], R2 ;
/*4140*/ @!P0 BRA `(.L_x_660) ;
/*4150*/ IADD3 R43, R34.reuse, 0x1f, RZ ;
/*4160*/ BSSY B7, `(.L_x_661) ;
/*4170*/ LOP3.LUT P0, R45, R34, 0x1f, RZ, 0xc0, !PT ;
/*4180*/ SHF.R.U32.HI R44, RZ, 0x5, R43 ;
/*4190*/ IADD3 R3, R44, -0x1, RZ ;
/*41a0*/ ISETP.GE.U32.AND P1, PT, R26, R3, PT ;
/*41b0*/ @P0 BRA P1, `(.L_x_662) ;
/*41c0*/ BRA.DIV ~URZ, `(.L_x_663) ;
/*41d0*/ SHFL.DOWN PT, R3, R17, 0x10, 0x1f ;
/*41e0*/ SHFL.DOWN PT, R2, R16, 0x10, 0x1f ;
/*41f0*/ SHFL.DOWN PT, R5, R19, 0x10, 0x1f ;
/*4200*/ SHFL.DOWN PT, R4, R18, 0x10, 0x1f ;
/*4210*/ SHFL.DOWN PT, R7, R37, 0x10, 0x1f ;
/*4220*/ SHFL.DOWN PT, R6, R36, 0x10, 0x1f ;
/*4230*/ DADD R2, R16, R2 ;
/*4240*/ SHFL.DOWN PT, R9, R3, 0x8, 0x1f ;
/*4250*/ DADD R4, R18, R4 ;
/*4260*/ SHFL.DOWN PT, R8, R2, 0x8, 0x1f ;
/*4270*/ SHFL.DOWN PT, R11, R5, 0x8, 0x1f ;
/*4280*/ DADD R6, R6, R36 ;
/*4290*/ SHFL.DOWN PT, R10, R4, 0x8, 0x1f ;
/*42a0*/ SHFL.DOWN PT, R13, R7, 0x8, 0x1f ;
/*42b0*/ SHFL.DOWN PT, R12, R6, 0x8, 0x1f ;
/*42c0*/ DADD R8, R2, R8 ;
/*42d0*/ SHFL.DOWN PT, R15, R9, 0x4, 0x1f ;
/*42e0*/ DADD R10, R4, R10 ;
/*42f0*/ SHFL.DOWN PT, R14, R8, 0x4, 0x1f ;
/*4300*/ SHFL.DOWN PT, R17, R11, 0x4, 0x1f ;
/*4310*/ DADD R12, R6, R12 ;
/*4320*/ SHFL.DOWN PT, R16, R10, 0x4, 0x1f ;
/*4330*/ SHFL.DOWN PT, R3, R13, 0x4, 0x1f ;
/*4340*/ SHFL.DOWN PT, R2, R12, 0x4, 0x1f ;
/*4350*/ DADD R14, R8, R14 ;
/*4360*/ SHFL.DOWN PT, R7, R15, 0x2, 0x1f ;
/*4370*/ DADD R4, R10, R16 ;
/*4380*/ SHFL.DOWN PT, R6, R14, 0x2, 0x1f ;
/*4390*/ SHFL.DOWN PT, R19, R5, 0x2, 0x1f ;
/*43a0*/ DADD R2, R12, R2 ;
/*43b0*/ SHFL.DOWN PT, R18, R4, 0x2, 0x1f ;
/*43c0*/ SHFL.DOWN PT, R37, R3, 0x2, 0x1f ;
/*43d0*/ SHFL.DOWN PT, R36, R2, 0x2, 0x1f ;
/*43e0*/ DADD R40, R14, R6 ;
/*43f0*/ SHFL.DOWN PT, R17, R41, 0x1, 0x1f ;
/*4400*/ DADD R18, R4, R18 ;
/*4410*/ SHFL.DOWN PT, R16, R40, 0x1, 0x1f ;
/*4420*/ SHFL.DOWN PT, R39, R19, 0x1, 0x1f ;
/*4430*/ DADD R36, R2, R36 ;
/*4440*/ SHFL.DOWN PT, R38, R18, 0x1, 0x1f ;
/*4450*/ SHFL.DOWN PT, R23, R37, 0x1, 0x1f ;
/*4460*/ SHFL.DOWN PT, R4, R36, 0x1, 0x1f ;
.L_x_769:
/*4470*/ MOV R5, R23 ;
/*4480*/ DADD R16, R16, R40 ;
/*4490*/ DADD R18, R38, R18 ;
/*44a0*/ STL.64 [R1], R16 ;
/*44b0*/ DADD R36, R4, R36 ;
/*44c0*/ STL.64 [R1+0x8], R18 ;
/*44d0*/ STL.64 [R1+0x10], R36 ;
/*44e0*/ BRA `(.L_x_664) ;
.L_x_662:
/*44f0*/ ISETP.GE.U32.AND P0, PT, R45, 0x2, PT ;
/*4500*/ @!P0 BRA `(.L_x_664) ;
/*4510*/ BSSY B6, `(.L_x_665) ;
/*4520*/ SHF.R.U32.HI R23, RZ, 0x1, R45 ;
.L_x_667:
/*4530*/ BRA.DIV ~URZ, `(.L_x_666) ;
/*4540*/ SHFL.DOWN PT, R41, R17, R23, 0x1f ;
/*4550*/ SHFL.DOWN PT, R40, R16, R23, 0x1f ;
/*4560*/ SHFL.DOWN PT, R39, R19, R23, 0x1f ;
/*4570*/ SHFL.DOWN PT, R38, R18, R23, 0x1f ;
/*4580*/ SHFL.DOWN PT, R42, R37, R23, 0x1f ;
/*4590*/ SHFL.DOWN PT, R2, R36, R23, 0x1f ;
.L_x_770:
/*45a0*/ LOP3.LUT R0, R23, 0xffff, RZ, 0xc0, !PT ;
/*45b0*/ IMAD.MOV.U32 R3, RZ, RZ, R42 ;
/*45c0*/ IADD3 R23, R45, 0x1, RZ ;
/*45d0*/ ISETP.GE.U32.AND P0, PT, R27, R0, PT ;
/*45e0*/ SHF.R.U32.HI R0, RZ, 0x1, R23.reuse ;
/*45f0*/ SHF.R.U32.HI R23, RZ, 0x2, R23 ;
/*4600*/ @!P0 DADD R40, R40, R16 ;
/*4610*/ @!P0 DADD R38, R38, R18 ;
/*4620*/ @!P0 DADD R2, R2, R36 ;
/*4630*/ ISETP.GT.U32.AND P0, PT, R45, 0x2, PT ;
/*4640*/ IMAD.MOV.U32 R45, RZ, RZ, R0 ;
/*4650*/ MOV R16, R40 ;
/*4660*/ IMAD.MOV.U32 R17, RZ, RZ, R41 ;
/*4670*/ IMAD.MOV.U32 R18, RZ, RZ, R38 ;
/*4680*/ IMAD.MOV.U32 R19, RZ, RZ, R39 ;
/*4690*/ IMAD.MOV.U32 R36, RZ, RZ, R2 ;
/*46a0*/ MOV R37, R3 ;
/*46b0*/ @P0 BRA `(.L_x_667) ;
/*46c0*/ BSYNC B6 ;
.L_x_665:
/*46d0*/ STL.64 [R1+0x10], R2 ;
/*46e0*/ IMAD.MOV.U32 R18, RZ, RZ, R38 ;
/*46f0*/ MOV R16, R40 ;
/*4700*/ IMAD.MOV.U32 R19, RZ, RZ, R39 ;
/*4710*/ STL.64 [R1], R40 ;
/*4720*/ MOV R17, R41 ;
/*4730*/ STL.64 [R1+0x8], R38 ;
.L_x_664:
/*4740*/ BSYNC B7 ;
.L_x_661:
/*4750*/ ISETP.GE.U32.AND P0, PT, R34, 0x21, PT ;
/*4760*/ @!P0 BRA `(.L_x_668) ;
/*4770*/ BSSY B7, `(.L_x_669) ;
/*4780*/ MOV R2, RZ ;
.L_x_682:
/*4790*/ LDS R0, [`(IsSPMDMode)] ;
/*47a0*/ YIELD (*"RELOCATOR OPCODE,YIELD,280"*);
/*47b0*/ BSSY B6, `(.L_x_670) ;
/*47c0*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*47d0*/ @P0 BRA `(.L_x_671) ;
/*47e0*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*47f0*/ ISETP.NE.AND P1, PT, R33, R28, PT ;
/*4800*/ ISETP.NE.OR P0, PT, R0, RZ, P1 ;
/*4810*/ @P0 BRA `(.L_x_672) ;
/*4820*/ @!PT LDS RZ, [RZ] ;
/*4830*/ @!PT LDS RZ, [RZ] ;
/*4840*/ @!PT LDS RZ, [RZ] ;
/*4850*/ @!PT LDS RZ, [RZ] ;
/*4860*/ MEMBAR.SC.GPU ;
/*4870*/ ERRBAR;
/*4880*/ CCTL.IVALL ;
/*4890*/ BRA `(.L_x_673) ;
.L_x_672:
/*48a0*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*48b0*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*48c0*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*48d0*/ SEL R5, R22, R3, !P1 ;
/*48e0*/ SEL R5, R5, 0x1, !P0 ;
/*48f0*/ BRA.DIV ~URZ, `(.L_x_674) ;
/*4900*/ BAR.SYNC 0x7, R5 ;
/*4910*/ BRA `(.L_x_673) ;
.L_x_671:
/*4920*/ WARPSYNC 0xffffffff ;
/*4930*/ BAR.SYNC 0x0 ;
.L_x_673:
/*4940*/ BSYNC B6 ;
.L_x_670:
/*4950*/ ISETP.NE.AND P0, PT, R35, RZ, PT ;
/*4960*/ BSSY B0, `(.L_x_675) ;
/*4970*/ IADD3 R23, R1, R2, RZ ;
/*4980*/ @P0 BRA `(.L_x_676) ;
/*4990*/ LDL R3, [R23] ;
/*49a0*/ STS [R24+`(__openmp_nvptx_data_transfer_temporary_storage)], R3 ;
.L_x_676:
/*49b0*/ BSYNC B0 ;
.L_x_675:
/*49c0*/ LDS R0, [`(IsSPMDMode)] ;
/*49d0*/ BSSY B6, `(.L_x_677) ;
/*49e0*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*49f0*/ @P0 BRA `(.L_x_678) ;
/*4a00*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*4a10*/ ISETP.NE.AND P1, PT, R33, R28, PT ;
/*4a20*/ ISETP.NE.OR P0, PT, R0, RZ, P1 ;
/*4a30*/ @P0 BRA `(.L_x_679) ;
/*4a40*/ @!PT LDS RZ, [RZ] ;
/*4a50*/ @!PT LDS RZ, [RZ] ;
/*4a60*/ @!PT LDS RZ, [RZ] ;
/*4a70*/ @!PT LDS RZ, [RZ] ;
/*4a80*/ MEMBAR.SC.GPU ;
/*4a90*/ ERRBAR;
/*4aa0*/ CCTL.IVALL ;
/*4ab0*/ BRA `(.L_x_680) ;
.L_x_679:
/*4ac0*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*4ad0*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*4ae0*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*4af0*/ SEL R5, R22, R3, !P1 ;
/*4b00*/ SEL R5, R5, 0x1, !P0 ;
/*4b10*/ BRA.DIV ~URZ, `(.L_x_681) ;
/*4b20*/ BAR.SYNC 0x7, R5 ;
/*4b30*/ BRA `(.L_x_680) ;
.L_x_678:
/*4b40*/ WARPSYNC 0xffffffff ;
/*4b50*/ BAR.SYNC 0x0 ;
.L_x_680:
/*4b60*/ BSYNC B6 ;
.L_x_677:
/*4b70*/ ISETP.GE.U32.AND P0, PT, R33, R44, PT ;
/*4b80*/ IADD3 R2, R2, 0x4, RZ ;
/*4b90*/ @!P0 LDS R0, [R25+`(__openmp_nvptx_data_transfer_temporary_storage)] ;
/*4ba0*/ @!P0 STL [R23], R0 ;
/*4bb0*/ ISETP.NE.AND P0, PT, R2, 0x18, PT ;
/*4bc0*/ @P0 BRA `(.L_x_682) ;
/*4bd0*/ BSYNC B7 ;
.L_x_669:
/*4be0*/ ISETP.GE.U32.AND P0, PT, R43, 0x40, PT ;
/*4bf0*/ BSSY B7, `(.L_x_668) ;
/*4c00*/ ISETP.GT.U32.OR P0, PT, R33, 0x1f, !P0 ;
/*4c10*/ @P0 BRA `(.L_x_683) ;
/*4c20*/ LDL.64 R16, [R1+0x10] ;
/*4c30*/ LDL.64 R18, [R1] ;
/*4c40*/ LDL.64 R22, [R1+0x8] ;
/*4c50*/ BSSY B6, `(.L_x_684) ;
/*4c60*/ SHF.R.U32.HI R2, RZ, 0x6, R43 ;
.L_x_686:
/*4c70*/ PRMT R41, R2, 0x9910, RZ ;
/*4c80*/ BRA.DIV ~URZ, `(.L_x_685) ;
/*4c90*/ SHFL.DOWN PT, R39, R19, R41, 0x1f ;
/*4ca0*/ SHFL.DOWN PT, R38, R18, R41, 0x1f ;
/*4cb0*/ SHFL.DOWN PT, R37, R23, R41, 0x1f ;
/*4cc0*/ SHFL.DOWN PT, R36, R22, R41, 0x1f ;
/*4cd0*/ SHFL.DOWN PT, R34, R17, R41, 0x1f ;
/*4ce0*/ SHFL.DOWN PT, R4, R16, R41, 0x1f ;
.L_x_771:
/*4cf0*/ LOP3.LUT R2, R2, 0xffff, RZ, 0xc0, !PT ;
/*4d00*/ IMAD.MOV.U32 R5, RZ, RZ, R34 ;
/*4d10*/ LOP3.LUT R3, R33, 0xffff, RZ, 0xc0, !PT ;
/*4d20*/ ISETP.GE.U32.AND P0, PT, R3, R2, PT ;
/*4d30*/ IADD3 R2, R44, 0x1, RZ ;
/*4d40*/ SHF.R.U32.HI R0, RZ, 0x1, R2.reuse ;
/*4d50*/ SHF.R.U32.HI R2, RZ, 0x2, R2 ;
/*4d60*/ @!P0 DADD R38, R38, R18 ;
/*4d70*/ @!P0 DADD R36, R36, R22 ;
/*4d80*/ @!P0 DADD R4, R4, R16 ;
/*4d90*/ ISETP.GT.U32.AND P0, PT, R44, 0x2, PT ;
/*4da0*/ IMAD.MOV.U32 R44, RZ, RZ, R0 ;
/*4db0*/ MOV R19, R39 ;
/*4dc0*/ IMAD.MOV.U32 R18, RZ, RZ, R38 ;
/*4dd0*/ IMAD.MOV.U32 R22, RZ, RZ, R36 ;
/*4de0*/ IMAD.MOV.U32 R23, RZ, RZ, R37 ;
/*4df0*/ MOV R16, R4 ;
/*4e00*/ IMAD.MOV.U32 R17, RZ, RZ, R5 ;
/*4e10*/ @P0 BRA `(.L_x_686) ;
/*4e20*/ BSYNC B6 ;
.L_x_684:
/*4e30*/ STL.64 [R1+0x10], R4 ;
/*4e40*/ IMAD.MOV.U32 R18, RZ, RZ, R22 ;
/*4e50*/ MOV R16, R38 ;
/*4e60*/ IMAD.MOV.U32 R19, RZ, RZ, R23 ;
/*4e70*/ MOV R17, R39 ;
.L_x_683:
/*4e80*/ BSYNC B7 ;
.L_x_668:
/*4e90*/ ISETP.NE.AND P0, PT, R33, RZ, PT ;
/*4ea0*/ P2R R23, PR, RZ, 0x1 ;
/*4eb0*/ @P0 BRA `(.L_x_687) ;
/*4ec0*/ LDL.64 R36, [R1+0x10] ;
.L_x_660:
/*4ed0*/ LDS.64 R2, [`($__ratioGradRef_local_shared__2701)] ;
/*4ee0*/ LDS.64 R6, [`(($__ratioGradRef_local_shared__2701 + 0x8))] ;
/*4ef0*/ LDS.64 R4, [`(($__ratioGradRef_local_shared__2701 + 0x10))] ;
/*4f00*/ DADD R2, R2, R16 ;
/*4f10*/ DADD R18, R18, R6 ;
/*4f20*/ STS.64 [`($__ratioGradRef_local_shared__2701)], R2 ;
/*4f30*/ DADD R4, R4, R36 ;
/*4f40*/ STS.64 [`(($__ratioGradRef_local_shared__2701 + 0x8))], R18 ;
/*4f50*/ STS.64 [`(($__ratioGradRef_local_shared__2701 + 0x10))], R4 ;
.L_x_687:
/*4f60*/ BSYNC B8 ;
.L_x_649:
/*4f70*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], RZ ;
/*4f80*/ BRA `(.L_x_647) ;
.L_x_648:
/*4f90*/ MOV R0, 0x1 ;
/*4fa0*/ STS [`(_ZN4ompx5state9TeamStateE_$_2)], R3 ;
/*4fb0*/ STS [`(_ZN4ompx5state9TeamStateE_$_1)], R0 ;
/*4fc0*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], R0 ;
/*4fd0*/ BAR.SYNC 0x8 ;
/*4fe0*/ BAR.SYNC 0x8 ;
/*4ff0*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], RZ ;
/*5000*/ STS [`(_ZN4ompx5state9TeamStateE_$_1)], RZ ;
/*5010*/ STS [`(_ZN4ompx5state9TeamStateE_$_2)], R0 ;
.L_x_647:
/*5020*/ LDS.128 R4, [`($__iw_shared5_$_0__2697)] ;
/*5030*/ IMAD.MOV.U32 R3, RZ, RZ, 0x18 ;
/*5040*/ ISETP.NE.AND P0, PT, R23, RZ, PT ;
/*5050*/ ULDC.64 UR4, c[0x0][0x118] ;
/*5060*/ LDS.64 R8, [`(($__ratioGradRef_local_shared__2701 + 0x10))] ;
/*5070*/ LDS.64 R10, [`($__ratioGradRef_local_shared__2701)] ;
/*5080*/ @!P0 STS.64 [`($__c_ratio_shared8_$_0__2711)], RZ ;
/*5090*/ IMAD.WIDE.U32 R2, R4, R3, c[0x0][0x1a8] ;
/*50a0*/ IMAD R5, R5, 0x18, RZ ;
/*50b0*/ IMAD.IADD R3, R3, 0x1, R5 ;
/*50c0*/ STG.E.64 [R2.64+0x8], R6 ;
/*50d0*/ STG.E.64 [R2.64+0x10], R8 ;
/*50e0*/ STG.E.64 [R2.64], R10 ;
/*50f0*/ WARPSYNC 0xffffffff ;
/*5100*/ ULDC UR4, c[0x0][0x0] ;
/*5110*/ BAR.SYNC 0x0 ;
/*5120*/ LDS R0, [`(IsSPMDMode)] ;
/*5130*/ UIADD3 UR4, UR4, -0x20, URZ ;
/*5140*/ IMAD.U32 R22, RZ, RZ, UR4 ;
/*5150*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*5160*/ IMAD.MOV.U32 R0, RZ, RZ, R22 ;
/*5170*/ @P0 IMAD.MOV R0, RZ, RZ, c[0x0][0x0] ;
/*5180*/ ISETP.GE.U32.AND P1, PT, R0.reuse, 0x20, PT ;
/*5190*/ LOP3.LUT R2, R0, 0xffffffe0, RZ, 0xc0, !PT ;
/*51a0*/ SEL R2, R2, 0x1, P1 ;
/*51b0*/ SEL R3, R2, c[0x0][0x0], !P0 ;
/*51c0*/ ISETP.NE.AND P1, PT, R3, R0, PT ;
/*51d0*/ SEL R3, R3, RZ, P1 ;
/*51e0*/ @!P0 BRA `(.L_x_688) ;
/*51f0*/ BAR.SYNC 0x0 ;
/*5200*/ ISETP.NE.AND P0, PT, R33, RZ, PT ;
/*5210*/ BSSY B9, `(.L_x_689) ;
/*5220*/ P2R R0, PR, RZ, 0x1 ;
/*5230*/ IADD3 R0, R3, -0x1, RZ ;
/*5240*/ @!P0 IMAD.MOV.U32 R2, RZ, RZ, 0x1 ;
/*5250*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_2)], R3 ;
/*5260*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_1)], R2 ;
/*5270*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_0)], R2 ;
/*5280*/ BAR.SYNC 0x0 ;
/*5290*/ BAR.SYNC 0x0 ;
/*52a0*/ ISETP.GE.U32.AND P0, PT, R0, R33, PT ;
/*52b0*/ @!P0 BRA `(.L_x_690) ;
/*52c0*/ LDS.64 R4, [`($__psiMinv_cols6_shared_$_0__2705)] ;
/*52d0*/ ISETP.NE.U32.AND P0, PT, R4, RZ, PT ;
/*52e0*/ ISETP.NE.AND.EX P0, PT, R5, RZ, PT, P0 ;
/*52f0*/ @!P0 BRA `(.L_x_690) ;
/*5300*/ LDS R2, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*5310*/ IADD3 R0, P1, R4, -0x1, RZ ;
/*5320*/ IMAD.MOV.U32 R23, RZ, RZ, 0x1 ;
/*5330*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_1)] ;
/*5340*/ ISETP.NE.AND P2, PT, R2.reuse, 0x1, PT ;
/*5350*/ ISETP.NE.AND P3, PT, R2, RZ, PT ;
/*5360*/ ISETP.NE.AND P0, PT, R3, R2, PT ;
/*5370*/ IADD3.X R3, R5, -0x1, RZ, P1, !PT ;
/*5380*/ SEL R6, R33, RZ, !P0 ;
/*5390*/ P2R R50, PR, RZ, 0x8 ;
/*53a0*/ SEL R34, R6, RZ, P3 ;
/*53b0*/ @P2 BRA `(.L_x_691) ;
/*53c0*/ LDS R23, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*53d0*/ ISETP.NE.AND P0, PT, R23, RZ, PT ;
/*53e0*/ @!P0 LDS R6, [`(IsSPMDMode)] ;
/*53f0*/ ISETP.NE.AND P1, PT, R6, RZ, !P0 ;
/*5400*/ @!P0 IMAD.MOV.U32 R6, RZ, RZ, R22 ;
/*5410*/ @P1 IADD3 R6, RZ, c[0x0][0x0], RZ ;
/*5420*/ @!P0 IMAD.MOV.U32 R23, RZ, RZ, R6 ;
.L_x_691:
/*5430*/ ISETP.GE.U32.AND P0, PT, R0, R34, PT ;
/*5440*/ BSSY B0, `(.L_x_692) ;
/*5450*/ SHF.R.S32.HI R38, RZ, 0x1f, R34 ;
/*5460*/ CS2R R16, SRZ ;
/*5470*/ ISETP.GE.U32.AND.EX P0, PT, R3, R38, PT, P0 ;
/*5480*/ @!P0 BRA `(.L_x_693) ;
/*5490*/ LDS.64 R8, [`($__iw_shared5_$_0__2697)] ;
/*54a0*/ ULDC.64 UR4, c[0x0][0x118] ;
/*54b0*/ LDS.64 R6, [`($__psiMinv_temp_list_devptr5_shared_$_0__2703)] ;
/*54c0*/ LDS.64 R10, [`($__psiV_temp_list_ptr3_shared4_$_0__2695)] ;
/*54d0*/ IMAD.SHL.U32 R47, R8.reuse, 0x8, RZ ;
/*54e0*/ SHF.L.U64.HI R9, R8, 0x3, R9 ;
/*54f0*/ IADD3 R44, P0, R6, R47.reuse, RZ ;
/*5500*/ LDS R6, [`($__WorkingIndex7_shared7_$_0__2707)] ;
/*5510*/ IADD3 R46, P1, R10, R47, RZ ;
/*5520*/ IMAD.X R45, R7, 0x1, R9, P0 ;
/*5530*/ IADD3.X R47, R11, R9, RZ, P1, !PT ;
/*5540*/ LD.E.64 R44, [R44.64] ;
/*5550*/ LD.E.64 R46, [R46.64] ;
/*5560*/ SHF.L.U64.HI R39, R4.reuse, 0x3, R5 ;
/*5570*/ IMAD.SHL.U32 R41, R4, 0x8, RZ ;
/*5580*/ SHF.R.S32.HI R42, RZ, 0x1f, R23 ;
/*5590*/ CS2R R16, SRZ ;
/*55a0*/ SHF.L.U32 R49, R23, 0x3, RZ ;
/*55b0*/ IMAD R4, R39, R34, RZ ;
/*55c0*/ SHF.L.U64.HI R51, R23, 0x3, R42 ;
/*55d0*/ IMAD.MOV.U32 R48, RZ, RZ, R34 ;
/*55e0*/ IMAD R5, R41, R38, R4 ;
/*55f0*/ IMAD.WIDE R6, R6, 0x8, RZ ;
/*5600*/ IMAD.WIDE.U32 R6, R41, R34, R6 ;
/*5610*/ IADD3 R43, P1, R44, R6, RZ ;
/*5620*/ IMAD R6, R39, R23, RZ ;
/*5630*/ LEA R40, P0, R34.reuse, R46, 0x3 ;
/*5640*/ IADD3.X R44, R45, R7, R5, P1, !PT ;
/*5650*/ IMAD.WIDE.U32 R4, R41, R23, RZ ;
/*5660*/ LEA.HI.X R46, R34.reuse, R47, R38, 0x3, P0 ;
/*5670*/ IADD3 R63, P0, R34, 0x1, RZ ;
/*5680*/ IMAD R47, R42, R41, R6 ;
/*5690*/ MOV R45, R38 ;
/*56a0*/ IMAD.X R62, RZ, RZ, R38, P0 ;
/*56b0*/ IADD3 R47, R5, R47, RZ ;
.L_x_697:
/*56c0*/ ISETP.GE.U32.AND P3, PT, R48, R63, PT ;
/*56d0*/ IMAD.MOV.U32 R56, RZ, RZ, R40 ;
/*56e0*/ MOV R52, R43 ;
/*56f0*/ IMAD.MOV.U32 R57, RZ, RZ, R46 ;
/*5700*/ ISETP.GE.U32.AND.EX P3, PT, R45, R62, PT, P3 ;
/*5710*/ IMAD.MOV.U32 R15, RZ, RZ, R44 ;
/*5720*/ ULDC.64 UR4, c[0x0][0x118] ;
/*5730*/ IMAD.MOV.U32 R60, RZ, RZ, R48 ;
/*5740*/ IMAD.MOV.U32 R61, RZ, RZ, R45 ;
/*5750*/ @P3 IMAD.MOV.U32 R8, RZ, RZ, R56 ;
/*5760*/ @P3 IADD3 R60, P0, R60, 0x1, RZ ;
/*5770*/ @P3 IMAD.MOV.U32 R9, RZ, RZ, R57 ;
/*5780*/ @P3 IMAD.MOV.U32 R6, RZ, RZ, R52 ;
/*5790*/ @P3 IADD3.X R61, RZ, R61, RZ, P0, !PT ;
/*57a0*/ @P3 IMAD.MOV.U32 R7, RZ, RZ, R15 ;
/*57b0*/ @P3 IADD3 R56, P0, R56, 0x8, RZ ;
/*57c0*/ @P3 LD.E.64 R8, [R8.64] ;
/*57d0*/ @P3 LD.E.64 R6, [R6.64] ;
/*57e0*/ @P3 IMAD.X R57, RZ, RZ, R57, P0 ;
/*57f0*/ @P3 IADD3 R52, P0, R52, R41, RZ ;
/*5800*/ @P3 IADD3.X R15, R15, R39, RZ, P0, !PT ;
/*5810*/ IADD3 R10, P0, R63, -R60, RZ ;
/*5820*/ ISETP.LE.U32.AND P1, PT, R10, 0x3, PT ;
/*5830*/ IMAD.X R11, R62, 0x1, ~R61, P0 ;
/*5840*/ ISETP.GT.U32.AND P0, PT, R63, R60, PT ;
/*5850*/ ISETP.GT.U32.AND.EX P0, PT, R62, R61, PT, P0 ;
/*5860*/ ISETP.LE.U32.OR.EX P0, PT, R11, RZ, !P0, P1 ;
/*5870*/ BSSY B1, `(.L_x_694) ;
/*5880*/ @P3 DFMA R16, R6, R8, R16 ;
/*5890*/ PLOP3.LUT P3, PT, P3, PT, PT, 0x8, 0x0 ;
/*58a0*/ @P0 BRA `(.L_x_695) ;
/*58b0*/ IADD3 R65, P0, R63, -0x3, RZ ;
/*58c0*/ PLOP3.LUT P3, PT, PT, PT, PT, 0x8, 0x0 ;
/*58d0*/ IADD3.X R64, R62, -0x1, RZ, P0, !PT ;
.L_x_696:
/*58e0*/ IADD3 R18, P0, R52, R41.reuse, RZ ;
/*58f0*/ IMAD.MOV.U32 R8, RZ, RZ, R56 ;
/*5900*/ ULDC.64 UR4, c[0x0][0x118] ;
/*5910*/ IMAD.MOV.U32 R9, RZ, RZ, R57 ;
/*5920*/ IMAD.MOV.U32 R20, RZ, RZ, R52 ;
/*5930*/ IMAD.MOV.U32 R21, RZ, RZ, R15 ;
/*5940*/ LD.E.64 R54, [R8.64] ;
/*5950*/ IMAD.X R19, R15, 0x1, R39, P0 ;
/*5960*/ IADD3 R58, P0, R18, R41, RZ ;
/*5970*/ LD.E.64 R52, [R20.64] ;
/*5980*/ IADD3.X R59, R19, R39, RZ, P0, !PT ;
/*5990*/ LD.E.64 R6, [R8.64+0x8] ;
/*59a0*/ LD.E.64 R18, [R18.64] ;
/*59b0*/ LD.E.64 R10, [R8.64+0x10] ;
/*59c0*/ LD.E.64 R14, [R58.64] ;
/*59d0*/ IADD3 R12, P0, R58, R41, RZ ;
/*59e0*/ LD.E.64 R20, [R8.64+0x18] ;
/*59f0*/ IMAD.X R13, R59, 0x1, R39, P0 ;
/*5a00*/ LD.E.64 R36, [R12.64] ;
/*5a10*/ IADD3 R56, P0, R8, 0x20, RZ ;
/*5a20*/ IMAD.X R57, RZ, RZ, R9, P0 ;
/*5a30*/ IADD3 R60, P0, R60, 0x4, RZ ;
/*5a40*/ IADD3.X R61, RZ, R61, RZ, P0, !PT ;
/*5a50*/ DFMA R52, R52, R54, R16 ;
/*5a60*/ DFMA R6, R18, R6, R52 ;
/*5a70*/ IADD3 R52, P0, R12, R41, RZ ;
/*5a80*/ DFMA R6, R14, R10, R6 ;
/*5a90*/ IMAD.X R15, R13, 0x1, R39, P0 ;
/*5aa0*/ ISETP.GE.U32.AND P0, PT, R60, R65, PT ;
/*5ab0*/ ISETP.GE.U32.AND.EX P0, PT, R61, R64, PT, P0 ;
/*5ac0*/ DFMA R16, R36, R20, R6 ;
/*5ad0*/ @!P0 BRA `(.L_x_696) ;
.L_x_695:
/*5ae0*/ BSYNC B1 ;
.L_x_694:
/*5af0*/ IADD3 R6, P0, R63, -R60, RZ ;
/*5b00*/ ULDC.64 UR4, c[0x0][0x118] ;
/*5b10*/ ISETP.LE.U32.AND P1, PT, R6, 0x1, PT ;
/*5b20*/ IMAD.X R7, R62, 0x1, ~R61, P0 ;
/*5b30*/ ISETP.GT.U32.AND P0, PT, R63, R60, PT ;
/*5b40*/ ISETP.GT.U32.AND.EX P0, PT, R62, R61, PT, P0 ;
/*5b50*/ ISETP.LE.U32.OR.EX P1, PT, R7, RZ, !P0, P1 ;
/*5b60*/ @!P1 IADD3 R60, P0, R60, 0x2, RZ ;
/*5b70*/ @!P1 IMAD.MOV.U32 R36, RZ, RZ, R56 ;
/*5b80*/ @!P1 PLOP3.LUT P3, PT, PT, PT, PT, 0x8, 0x0 ;
/*5b90*/ @!P1 IMAD.MOV.U32 R37, RZ, RZ, R57 ;
/*5ba0*/ @!P1 IMAD.X R61, RZ, RZ, R61, P0 ;
/*5bb0*/ @!P1 IADD3 R12, P0, R52, R41, RZ ;
/*5bc0*/ @!P1 IMAD.MOV.U32 R20, RZ, RZ, R52 ;
/*5bd0*/ @!P1 LD.E.64 R8, [R36.64] ;
/*5be0*/ @!P1 IMAD.MOV.U32 R21, RZ, RZ, R15 ;
/*5bf0*/ @!P1 IMAD.X R13, R15, 0x1, R39, P0 ;
/*5c00*/ ISETP.LT.U32.AND P0, PT, R60, R63, PT ;
/*5c10*/ @!P1 LD.E.64 R10, [R36.64+0x8] ;
/*5c20*/ ISETP.LT.U32.OR.EX P0, PT, R61, R62, P3, P0 ;
/*5c30*/ @!P1 LD.E.64 R6, [R20.64] ;
/*5c40*/ @!P1 IADD3 R56, P3, R56, 0x10, RZ ;
/*5c50*/ @!P1 IMAD.X R57, RZ, RZ, R57, P3 ;
/*5c60*/ @!P1 IADD3 R52, P3, R12, R41, RZ ;
/*5c70*/ @!P1 IMAD.X R15, R13, 0x1, R39, P3 ;
/*5c80*/ @P0 LD.E.64 R18, [R56.64] ;
/*5c90*/ @!P1 LD.E.64 R12, [R12.64] ;
/*5ca0*/ IMAD.MOV.U32 R14, RZ, RZ, R52 ;
/*5cb0*/ @P0 LD.E.64 R14, [R14.64] ;
/*5cc0*/ @!P1 DFMA R6, R6, R8, R16 ;
/*5cd0*/ @!P1 DFMA R16, R12, R10, R6 ;
/*5ce0*/ IADD3 R7, P1, R23, R34, RZ ;
/*5cf0*/ IMAD.X R38, R42, 0x1, R38, P1 ;
/*5d00*/ ISETP.LT.U32.AND P1, PT, R7, R0, PT ;
/*5d10*/ @P0 DFMA R16, R14, R18, R16 ;
/*5d20*/ IADD3 R48, P0, R23, R48, RZ ;
/*5d30*/ ISETP.LT.U32.AND.EX P1, PT, R38, R3, PT, P1 ;
/*5d40*/ SEL R34, R7, R0, P1 ;
/*5d50*/ IMAD.X R45, R42, 0x1, R45, P0 ;
/*5d60*/ SEL R38, R38, R3, P1 ;
/*5d70*/ IADD3 R40, P0, R40, R49, RZ ;
/*5d80*/ IADD3 R63, P1, R34, 0x1, RZ ;
/*5d90*/ IMAD.X R46, R46, 0x1, R51, P0 ;
/*5da0*/ ISETP.GE.U32.AND P0, PT, R48, R63, PT ;
/*5db0*/ IMAD.X R62, RZ, RZ, R38, P1 ;
/*5dc0*/ ISETP.GE.U32.AND.EX P0, PT, R45, R62, PT, P0 ;
/*5dd0*/ IADD3 R43, P1, R43, R4, RZ ;
/*5de0*/ IMAD.X R44, R44, 0x1, R47, P1 ;
/*5df0*/ @!P0 BRA `(.L_x_697) ;
.L_x_693:
/*5e00*/ BSYNC B0 ;
.L_x_692:
/*5e10*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*5e20*/ MOV R37, 0x1 ;
/*5e30*/ @P2 BRA `(.L_x_698) ;
/*5e40*/ LDS R37, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*5e50*/ ISETP.NE.AND P1, PT, R37, RZ, PT ;
/*5e60*/ @!P1 LDS R0, [`(IsSPMDMode)] ;
/*5e70*/ ISETP.NE.AND P2, PT, R0, RZ, !P1 ;
/*5e80*/ @!P1 IMAD.MOV.U32 R0, RZ, RZ, c[0x0][0x0] ;
/*5e90*/ @!P1 IADD3 R22, R0, -0x20, RZ ;
/*5ea0*/ @!P1 IMAD.MOV.U32 R0, RZ, RZ, R22 ;
/*5eb0*/ @P2 IADD3 R0, RZ, c[0x0][0x0], RZ ;
/*5ec0*/ @!P1 IMAD.MOV.U32 R37, RZ, RZ, R0 ;
.L_x_698:
/*5ed0*/ ISETP.NE.AND P1, PT, R37, 0x1, PT ;
/*5ee0*/ BSSY B8, `(.L_x_699) ;
/*5ef0*/ @!P1 BRA `(.L_x_700) ;
/*5f00*/ IADD3 R36, R37.reuse, 0x1f, RZ ;
/*5f10*/ BSSY B6, `(.L_x_701) ;
/*5f20*/ LOP3.LUT P0, R18, R37, 0x1f, RZ, 0xc0, !PT ;
/*5f30*/ SHF.R.U32.HI R34, RZ, 0x5, R36 ;
/*5f40*/ IADD3 R3, R34, -0x1, RZ ;
/*5f50*/ ISETP.GE.U32.AND P1, PT, R26, R3, PT ;
/*5f60*/ @P0 BRA P1, `(.L_x_702) ;
/*5f70*/ BRA.DIV ~URZ, `(.L_x_703) ;
/*5f80*/ SHFL.DOWN PT, R5, R17, 0x10, 0x1f ;
/*5f90*/ SHFL.DOWN PT, R4, R16, 0x10, 0x1f ;
/*5fa0*/ DADD R6, R16, R4 ;
/*5fb0*/ SHFL.DOWN PT, R5, R7, 0x8, 0x1f ;
/*5fc0*/ SHFL.DOWN PT, R4, R6, 0x8, 0x1f ;
/*5fd0*/ DADD R8, R6, R4 ;
/*5fe0*/ SHFL.DOWN PT, R5, R9, 0x4, 0x1f ;
/*5ff0*/ SHFL.DOWN PT, R4, R8, 0x4, 0x1f ;
/*6000*/ DADD R10, R8, R4 ;
/*6010*/ SHFL.DOWN PT, R5, R11, 0x2, 0x1f ;
/*6020*/ SHFL.DOWN PT, R4, R10, 0x2, 0x1f ;
/*6030*/ DADD R18, R10, R4 ;
/*6040*/ SHFL.DOWN PT, R17, R19, 0x1, 0x1f ;
/*6050*/ SHFL.DOWN PT, R4, R18, 0x1, 0x1f ;
.L_x_772:
/*6060*/ MOV R5, R17 ;
/*6070*/ DADD R16, R4, R18 ;
/*6080*/ BRA `(.L_x_704) ;
.L_x_702:
/*6090*/ ISETP.GE.U32.AND P0, PT, R18, 0x2, PT ;
/*60a0*/ @!P0 BRA `(.L_x_704) ;
/*60b0*/ SHF.R.U32.HI R19, RZ, 0x1, R18 ;
.L_x_706:
/*60c0*/ BRA.DIV ~URZ, `(.L_x_705) ;
/*60d0*/ SHFL.DOWN PT, R23, R17, R19, 0x1f ;
/*60e0*/ SHFL.DOWN PT, R4, R16, R19, 0x1f ;
.L_x_773:
/*60f0*/ LOP3.LUT R0, R19, 0xffff, RZ, 0xc0, !PT ;
/*6100*/ IMAD.MOV.U32 R5, RZ, RZ, R23 ;
/*6110*/ IADD3 R19, R18, 0x1, RZ ;
/*6120*/ ISETP.GE.U32.AND P0, PT, R27, R0, PT ;
/*6130*/ FSEL R16, R16, RZ, !P0 ;
/*6140*/ FSEL R17, R17, -RZ, !P0 ;
/*6150*/ ISETP.GT.U32.AND P0, PT, R18, 0x2, PT ;
/*6160*/ SHF.R.U32.HI R18, RZ, 0x1, R19.reuse ;
/*6170*/ DADD R16, R4, R16 ;
/*6180*/ SHF.R.U32.HI R19, RZ, 0x2, R19 ;
/*6190*/ @P0 BRA `(.L_x_706) ;
.L_x_704:
/*61a0*/ BSYNC B6 ;
.L_x_701:
/*61b0*/ ISETP.GE.U32.AND P1, PT, R37, 0x21, PT ;
/*61c0*/ BSSY B7, `(.L_x_707) ;
/*61d0*/ ISETP.NE.AND P0, PT, R50, RZ, PT ;
/*61e0*/ ISETP.EQ.AND P0, PT, R33, R28, !P0 ;
/*61f0*/ SEL R18, R33, RZ, !P0 ;
/*6200*/ @!P1 BRA `(.L_x_708) ;
/*6210*/ LDS R0, [`(IsSPMDMode)] ;
/*6220*/ ISETP.NE.AND P2, PT, R33, R28, PT ;
/*6230*/ BSSY B6, `(.L_x_709) ;
/*6240*/ ISETP.NE.AND P1, PT, R0, RZ, PT ;
/*6250*/ P2R R0, PR, RZ, 0x4 ;
/*6260*/ @P1 BRA `(.L_x_710) ;
/*6270*/ @!P0 BRA `(.L_x_711) ;
/*6280*/ @!PT LDS RZ, [RZ] ;
/*6290*/ @!PT LDS RZ, [RZ] ;
/*62a0*/ @!PT LDS RZ, [RZ] ;
/*62b0*/ @!PT LDS RZ, [RZ] ;
/*62c0*/ MEMBAR.SC.GPU ;
/*62d0*/ ERRBAR;
/*62e0*/ CCTL.IVALL ;
/*62f0*/ BRA `(.L_x_712) ;
.L_x_711:
/*6300*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*6310*/ ISETP.NE.AND P0, PT, R2, 0x1, PT ;
/*6320*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*6330*/ SEL R5, R22, R3, !P1 ;
/*6340*/ SEL R5, R5, 0x1, !P0 ;
/*6350*/ BRA.DIV ~URZ, `(.L_x_713) ;
/*6360*/ BAR.SYNC 0x7, R5 ;
/*6370*/ BRA `(.L_x_712) ;
.L_x_710:
/*6380*/ WARPSYNC 0xffffffff ;
/*6390*/ BAR.SYNC 0x0 ;
.L_x_712:
/*63a0*/ BSYNC B6 ;
.L_x_709:
/*63b0*/ ISETP.NE.AND P0, PT, R35, RZ, PT ;
/*63c0*/ BSSY B6, `(.L_x_714) ;
/*63d0*/ @!P0 STS [R24+`(__openmp_nvptx_data_transfer_temporary_storage)], R16 ;
/*63e0*/ LDS R0, [`(IsSPMDMode)] ;
/*63f0*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*6400*/ @P0 BRA `(.L_x_715) ;
/*6410*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*6420*/ ISETP.NE.AND P1, PT, R33, R28, PT ;
/*6430*/ ISETP.NE.OR P0, PT, R0, RZ, P1 ;
/*6440*/ @P0 BRA `(.L_x_716) ;
/*6450*/ @!PT LDS RZ, [RZ] ;
/*6460*/ @!PT LDS RZ, [RZ] ;
/*6470*/ @!PT LDS RZ, [RZ] ;
/*6480*/ @!PT LDS RZ, [RZ] ;
/*6490*/ MEMBAR.SC.GPU ;
/*64a0*/ ERRBAR;
/*64b0*/ CCTL.IVALL ;
/*64c0*/ BRA `(.L_x_717) ;
.L_x_716:
/*64d0*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*64e0*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*64f0*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*6500*/ SEL R5, R22, R3, !P1 ;
/*6510*/ SEL R5, R5, 0x1, !P0 ;
/*6520*/ BRA.DIV ~URZ, `(.L_x_718) ;
/*6530*/ BAR.SYNC 0x7, R5 ;
/*6540*/ BRA `(.L_x_717) ;
.L_x_715:
/*6550*/ WARPSYNC 0xffffffff ;
/*6560*/ BAR.SYNC 0x0 ;
.L_x_717:
/*6570*/ BSYNC B6 ;
.L_x_714:
/*6580*/ LDS R0, [`(IsSPMDMode)] ;
/*6590*/ ISETP.GE.U32.AND P1, PT, R33, R34, PT ;
/*65a0*/ P2R R2, PR, RZ, 0x2 ;
/*65b0*/ @!P1 LDS R3, [R25+`(__openmp_nvptx_data_transfer_temporary_storage)] ;
/*65c0*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*65d0*/ @P0 WARPSYNC 0xffffffff ;
/*65e0*/ @!P1 IMAD.MOV.U32 R16, RZ, RZ, R3 ;
/*65f0*/ @P0 BAR.SYNC 0x0 ;
/*6600*/ @P0 BRA `(.L_x_719) ;
/*6610*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*6620*/ ISETP.NE.AND P1, PT, R33, R28, PT ;
/*6630*/ BSSY B6, `(.L_x_719) ;
/*6640*/ ISETP.EQ.AND P0, PT, R0, RZ, PT ;
/*6650*/ @!P1 BRA P0, `(.L_x_720) ;
/*6660*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*6670*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*6680*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*6690*/ SEL R5, R22, R3, !P1 ;
/*66a0*/ SEL R5, R5, 0x1, !P0 ;
/*66b0*/ BRA.DIV ~URZ, `(.L_x_721) ;
/*66c0*/ BAR.SYNC 0x7, R5 ;
/*66d0*/ BRA `(.L_x_722) ;
.L_x_720:
/*66e0*/ @!PT LDS RZ, [RZ] ;
/*66f0*/ @!PT LDS RZ, [RZ] ;
/*6700*/ @!PT LDS RZ, [RZ] ;
/*6710*/ @!PT LDS RZ, [RZ] ;
/*6720*/ MEMBAR.SC.GPU ;
/*6730*/ ERRBAR;
/*6740*/ CCTL.IVALL ;
.L_x_722:
/*6750*/ BSYNC B6 ;
.L_x_719:
/*6760*/ ISETP.NE.AND P0, PT, R35, RZ, PT ;
/*6770*/ @!P0 STS [R24+`(__openmp_nvptx_data_transfer_temporary_storage)], R17 ;
/*6780*/ LDS R0, [`(IsSPMDMode)] ;
/*6790*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*67a0*/ @P0 WARPSYNC 0xffffffff ;
/*67b0*/ @P0 BAR.SYNC 0x0 ;
/*67c0*/ @P0 BRA `(.L_x_723) ;
/*67d0*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*67e0*/ ISETP.NE.AND P1, PT, R33, R28, PT ;
/*67f0*/ BSSY B6, `(.L_x_723) ;
/*6800*/ ISETP.EQ.AND P0, PT, R0, RZ, PT ;
/*6810*/ @!P1 BRA P0, `(.L_x_724) ;
/*6820*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*6830*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*6840*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*6850*/ SEL R5, R22, R3, !P1 ;
/*6860*/ SEL R5, R5, 0x1, !P0 ;
/*6870*/ BRA.DIV ~URZ, `(.L_x_725) ;
/*6880*/ BAR.SYNC 0x7, R5 ;
/*6890*/ BRA `(.L_x_726) ;
.L_x_724:
/*68a0*/ @!PT LDS RZ, [RZ] ;
/*68b0*/ @!PT LDS RZ, [RZ] ;
/*68c0*/ @!PT LDS RZ, [RZ] ;
/*68d0*/ @!PT LDS RZ, [RZ] ;
/*68e0*/ MEMBAR.SC.GPU ;
/*68f0*/ ERRBAR;
/*6900*/ CCTL.IVALL ;
.L_x_726:
/*6910*/ BSYNC B6 ;
.L_x_723:
/*6920*/ ISETP.NE.AND P1, PT, R2, RZ, PT ;
/*6930*/ ISETP.GE.U32.AND P0, PT, R36, 0x40, PT ;
/*6940*/ ISETP.GT.U32.OR P0, PT, R33, 0x1f, !P0 ;
/*6950*/ @!P1 LDS R3, [R25+`(__openmp_nvptx_data_transfer_temporary_storage)] ;
/*6960*/ @!P1 IMAD.MOV.U32 R17, RZ, RZ, R3 ;
/*6970*/ @P0 BRA `(.L_x_708) ;
/*6980*/ SHF.R.U32.HI R36, RZ, 0x6, R36 ;
.L_x_728:
/*6990*/ PRMT R23, R36, 0x9910, RZ ;
/*69a0*/ BRA.DIV ~URZ, `(.L_x_727) ;
/*69b0*/ SHFL.DOWN PT, R19, R17, R23, 0x1f ;
/*69c0*/ SHFL.DOWN PT, R4, R16, R23, 0x1f ;
.L_x_774:
/*69d0*/ LOP3.LUT R36, R36, 0xffff, RZ, 0xc0, !PT ;
/*69e0*/ IMAD.MOV.U32 R5, RZ, RZ, R19 ;
/*69f0*/ LOP3.LUT R3, R18, 0xffff, RZ, 0xc0, !PT ;
/*6a00*/ ISETP.GE.U32.AND P0, PT, R3, R36, PT ;
/*6a10*/ IADD3 R36, R34, 0x1, RZ ;
/*6a20*/ FSEL R16, R16, RZ, !P0 ;
/*6a30*/ FSEL R17, R17, -RZ, !P0 ;
/*6a40*/ ISETP.GT.U32.AND P0, PT, R34, 0x2, PT ;
/*6a50*/ SHF.R.U32.HI R34, RZ, 0x1, R36 ;
/*6a60*/ DADD R16, R4, R16 ;
/*6a70*/ SHF.R.U32.HI R36, RZ, 0x2, R36 ;
/*6a80*/ @P0 BRA `(.L_x_728) ;
.L_x_708:
/*6a90*/ BSYNC B7 ;
.L_x_707:
/*6aa0*/ ISETP.NE.AND P0, PT, R18, RZ, PT ;
.L_x_700:
/*6ab0*/ BSYNC B8 ;
.L_x_699:
/*6ac0*/ @!P0 LDS.64 R2, [`($__c_ratio_shared8_$_0__2711)] ;
/*6ad0*/ @!P0 DADD R2, R2, R16 ;
/*6ae0*/ @!P0 STS.64 [`($__c_ratio_shared8_$_0__2711)], R2 ;
.L_x_690:
/*6af0*/ BSYNC B9 ;
.L_x_689:
/*6b00*/ ISETP.NE.AND P0, PT, R33, RZ, PT ;
/*6b10*/ WARPSYNC 0xffffffff ;
/*6b20*/ BAR.SYNC 0x0 ;
/*6b30*/ @!P0 MOV R0, 0x1 ;
/*6b40*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_0)], RZ ;
/*6b50*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_1)], RZ ;
/*6b60*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_2)], R0 ;
/*6b70*/ BAR.SYNC 0x0 ;
/*6b80*/ BAR.SYNC 0x0 ;
/*6b90*/ BRA `(.L_x_729) ;
.L_x_688:
/*6ba0*/ ISETP.GT.U32.AND P0, PT, R2, 0x1, PT ;
/*6bb0*/ @P0 BRA `(.L_x_730) ;
/*6bc0*/ IMAD.MOV.U32 R0, RZ, RZ, 0x1 ;
/*6bd0*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], R0 ;
/*6be0*/ LDS.64 R10, [`($__psiMinv_cols6_shared_$_0__2705)] ;
/*6bf0*/ ISETP.NE.U32.AND P0, PT, R10, RZ, PT ;
/*6c00*/ ISETP.NE.AND.EX P0, PT, R11, RZ, PT, P0 ;
/*6c10*/ @!P0 BRA `(.L_x_731) ;
/*6c20*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_1)] ;
/*6c30*/ BSSY B0, `(.L_x_732) ;
/*6c40*/ CS2R R16, SRZ ;
/*6c50*/ LDS R5, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*6c60*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*6c70*/ IADD3 R0, P1, R10, -0x1, RZ ;
/*6c80*/ SEL R13, R33, RZ, !P0 ;
/*6c90*/ IADD3.X R3, R11, -0x1, RZ, P1, !PT ;
/*6ca0*/ ISETP.GE.U32.AND P0, PT, R0, R13, PT ;
/*6cb0*/ SHF.R.S32.HI R14, RZ, 0x1f, R13 ;
/*6cc0*/ ISETP.NE.AND P1, PT, R5, RZ, PT ;
/*6cd0*/ ISETP.GE.U32.AND.EX P0, PT, R3, R14, PT, P0 ;
/*6ce0*/ SEL R2, R22, R5, !P1 ;
/*6cf0*/ @!P0 BRA `(.L_x_733) ;
/*6d00*/ LDS.64 R6, [`($__iw_shared5_$_0__2697)] ;
/*6d10*/ ULDC.64 UR4, c[0x0][0x118] ;
/*6d20*/ LDS.64 R4, [`($__psiMinv_temp_list_devptr5_shared_$_0__2703)] ;
/*6d30*/ LDS.64 R8, [`($__psiV_temp_list_ptr3_shared4_$_0__2695)] ;
/*6d40*/ LDS R12, [`($__WorkingIndex7_shared7_$_0__2707)] ;
/*6d50*/ IMAD.SHL.U32 R15, R6.reuse, 0x8, RZ ;
/*6d60*/ SHF.L.U64.HI R7, R6, 0x3, R7 ;
/*6d70*/ IADD3 R4, P0, R4, R15.reuse, RZ ;
/*6d80*/ IADD3 R6, P1, R8, R15, RZ ;
/*6d90*/ IMAD.X R5, R5, 0x1, R7, P0 ;
/*6da0*/ IADD3.X R7, R9, R7, RZ, P1, !PT ;
/*6db0*/ LD.E.64 R4, [R4.64] ;
/*6dc0*/ LD.E.64 R6, [R6.64] ;
/*6dd0*/ IMAD.MOV.U32 R9, RZ, RZ, R13 ;
/*6de0*/ SHF.L.U64.HI R8, R10, 0x3, R11 ;
/*6df0*/ IMAD.WIDE R12, R12, 0x8, RZ ;
/*6e00*/ MOV R11, R14 ;
/*6e10*/ SHF.R.S32.HI R19, RZ, 0x1f, R2 ;
/*6e20*/ IMAD.SHL.U32 R10, R10, 0x8, RZ ;
/*6e30*/ IMAD R14, R8, R9.reuse, RZ ;
/*6e40*/ SHF.L.U64.HI R18, R2, 0x3, R19 ;
/*6e50*/ IMAD.WIDE.U32 R16, R10, R9, R12 ;
/*6e60*/ IMAD R21, R10, R11.reuse, R14 ;
/*6e70*/ MOV R14, R11 ;
/*6e80*/ IMAD.MOV.U32 R41, RZ, RZ, R9 ;
/*6e90*/ IADD3 R13, P1, R4, R16, RZ ;
/*6ea0*/ LEA R15, P0, R9, R6, 0x3 ;
/*6eb0*/ IMAD R6, R8, R2.reuse, RZ ;
/*6ec0*/ IADD3.X R12, R5, R17, R21, P1, !PT ;
/*6ed0*/ IMAD.WIDE.U32 R4, R10, R2, RZ ;
/*6ee0*/ LEA.HI.X R39, R9.reuse, R7, R11, 0x3, P0 ;
/*6ef0*/ CS2R R16, SRZ ;
/*6f00*/ IADD3 R34, P0, R9, 0x1, RZ ;
/*6f10*/ IMAD R37, R19, R10, R6 ;
/*6f20*/ IMAD.X R59, RZ, RZ, R11, P0 ;
/*6f30*/ IADD3 R37, R5, R37, RZ ;
.L_x_737:
/*6f40*/ ISETP.GE.U32.AND P0, PT, R41, R34, PT ;
/*6f50*/ IMAD.MOV.U32 R55, RZ, RZ, R13 ;
/*6f60*/ ULDC.64 UR4, c[0x0][0x118] ;
/*6f70*/ IMAD.MOV.U32 R47, RZ, RZ, R12 ;
/*6f80*/ ISETP.GE.U32.AND.EX P0, PT, R14, R59, PT, P0 ;
/*6f90*/ IMAD.MOV.U32 R40, RZ, RZ, R15 ;
/*6fa0*/ IMAD.MOV.U32 R49, RZ, RZ, R39 ;
/*6fb0*/ @P0 MOV R6, R55 ;
/*6fc0*/ @P0 IMAD.MOV.U32 R7, RZ, RZ, R47 ;
/*6fd0*/ @P0 IMAD.MOV.U32 R20, RZ, RZ, R40 ;
/*6fe0*/ @P0 IMAD.MOV.U32 R21, RZ, RZ, R49 ;
/*6ff0*/ @P0 LD.E.64 R6, [R6.64] ;
/*7000*/ @P0 LD.E.64 R20, [R20.64] ;
/*7010*/ MOV R61, R41 ;
/*7020*/ IMAD.MOV.U32 R38, RZ, RZ, R14 ;
/*7030*/ BSSY B1, `(.L_x_734) ;
/*7040*/ @P0 IADD3 R61, P1, R61, 0x1, RZ ;
/*7050*/ IADD3 R36, P3, R34.reuse, -R61.reuse, RZ ;
/*7060*/ @P0 IMAD.X R38, RZ, RZ, R38, P1 ;
/*7070*/ ISETP.GT.U32.AND P2, PT, R34, R61, PT ;
/*7080*/ ISETP.LE.U32.AND P1, PT, R36, 0x3, PT ;
/*7090*/ IMAD.X R36, R59.reuse, 0x1, ~R38, P3 ;
/*70a0*/ ISETP.GT.U32.AND.EX P2, PT, R59, R38, PT, P2 ;
/*70b0*/ @P0 IADD3 R40, P3, R40, 0x8, RZ ;
/*70c0*/ ISETP.LE.U32.OR.EX P2, PT, R36, RZ, !P2, P1 ;
/*70d0*/ @P0 IADD3.X R49, RZ, R49, RZ, P3, !PT ;
/*70e0*/ @P0 IADD3 R55, P3, R55, R10, RZ ;
/*70f0*/ PLOP3.LUT P1, PT, P0, PT, PT, 0x8, 0x0 ;
/*7100*/ @P0 IMAD.X R47, R47, 0x1, R8, P3 ;
/*7110*/ @P0 DFMA R16, R6, R20, R16 ;
/*7120*/ @P2 BRA `(.L_x_735) ;
/*7130*/ IADD3 R36, P0, R34, -0x3, RZ ;
/*7140*/ PLOP3.LUT P1, PT, PT, PT, PT, 0x8, 0x0 ;
/*7150*/ IADD3.X R63, R59, -0x1, RZ, P0, !PT ;
.L_x_736:
/*7160*/ IADD3 R54, P0, R55, R10.reuse, RZ ;
/*7170*/ IMAD.MOV.U32 R6, RZ, RZ, R40 ;
/*7180*/ ULDC.64 UR4, c[0x0][0x118] ;
/*7190*/ IMAD.MOV.U32 R7, RZ, RZ, R49 ;
/*71a0*/ IMAD.MOV.U32 R46, RZ, RZ, R55 ;
/*71b0*/ IMAD.X R55, R47, 0x1, R8, P0 ;
/*71c0*/ LD.E.64 R52, [R6.64] ;
/*71d0*/ IADD3 R56, P0, R54, R10, RZ ;
/*71e0*/ LD.E.64 R50, [R46.64] ;
/*71f0*/ IADD3.X R57, R55, R8, RZ, P0, !PT ;
/*7200*/ IADD3 R20, P0, R56, R10, RZ ;
/*7210*/ LD.E.64 R42, [R6.64+0x8] ;
/*7220*/ LD.E.64 R54, [R54.64] ;
/*7230*/ IMAD.X R21, R57, 0x1, R8, P0 ;
/*7240*/ LD.E.64 R44, [R6.64+0x10] ;
/*7250*/ LD.E.64 R56, [R56.64] ;
/*7260*/ LD.E.64 R46, [R6.64+0x18] ;
/*7270*/ LD.E.64 R48, [R20.64] ;
/*7280*/ IADD3 R61, P0, R61, 0x4, RZ ;
/*7290*/ IMAD.X R38, RZ, RZ, R38, P0 ;
/*72a0*/ ISETP.GE.U32.AND P0, PT, R61, R36, PT ;
/*72b0*/ ISETP.GE.U32.AND.EX P0, PT, R38, R63, PT, P0 ;
/*72c0*/ IADD3 R40, P2, R6, 0x20, RZ ;
/*72d0*/ DFMA R50, R50, R52, R16 ;
/*72e0*/ DFMA R42, R54, R42, R50 ;
/*72f0*/ IADD3 R55, P3, R20, R10, RZ ;
/*7300*/ DFMA R42, R56, R44, R42 ;
/*7310*/ DFMA R16, R48, R46, R42 ;
/*7320*/ IADD3.X R49, RZ, R7, RZ, P2, !PT ;
/*7330*/ IMAD.X R47, R21, 0x1, R8, P3 ;
/*7340*/ @!P0 BRA `(.L_x_736) ;
.L_x_735:
/*7350*/ BSYNC B1 ;
.L_x_734:
/*7360*/ IADD3 R6, P3, R34.reuse, -R61.reuse, RZ ;
/*7370*/ ULDC.64 UR4, c[0x0][0x118] ;
/*7380*/ ISETP.GT.U32.AND P2, PT, R34, R61, PT ;
/*7390*/ ISETP.LE.U32.AND P0, PT, R6, 0x1, PT ;
/*73a0*/ IMAD.X R6, R59.reuse, 0x1, ~R38, P3 ;
/*73b0*/ ISETP.GT.U32.AND.EX P2, PT, R59, R38, PT, P2 ;
/*73c0*/ ISETP.LE.U32.OR.EX P0, PT, R6, RZ, !P2, P0 ;
/*73d0*/ @!P0 IMAD.MOV.U32 R6, RZ, RZ, R55 ;
/*73e0*/ @!P0 IADD3 R50, P2, R55, R10, RZ ;
/*73f0*/ @!P0 IMAD.MOV.U32 R7, RZ, RZ, R47 ;
/*7400*/ @!P0 PLOP3.LUT P1, PT, PT, PT, PT, 0x8, 0x0 ;
/*7410*/ @!P0 IMAD.MOV.U32 R52, RZ, RZ, R40 ;
/*7420*/ @!P0 IADD3 R61, P3, R61, 0x2, RZ ;
/*7430*/ @!P0 IMAD.MOV.U32 R53, RZ, RZ, R49 ;
/*7440*/ @!P0 LD.E.64 R6, [R6.64] ;
/*7450*/ @!P0 IMAD.X R51, R47, 0x1, R8, P2 ;
/*7460*/ ISETP.LT.U32.AND P2, PT, R61, R34, PT ;
/*7470*/ @!P0 LD.E.64 R20, [R52.64] ;
/*7480*/ @!P0 IMAD.X R38, RZ, RZ, R38, P3 ;
/*7490*/ @!P0 LD.E.64 R42, [R52.64+0x8] ;
/*74a0*/ @!P0 LD.E.64 R44, [R50.64] ;
/*74b0*/ ISETP.LT.U32.OR.EX P1, PT, R38, R59, P1, P2 ;
/*74c0*/ @!P0 IADD3 R55, P2, R50, R10, RZ ;
/*74d0*/ @!P0 IMAD.X R47, R51, 0x1, R8, P2 ;
/*74e0*/ @!P0 IADD3 R40, P2, R40, 0x10, RZ ;
/*74f0*/ IMAD.MOV.U32 R46, RZ, RZ, R55 ;
/*7500*/ @!P0 IMAD.X R49, RZ, RZ, R49, P2 ;
/*7510*/ IMAD.MOV.U32 R48, RZ, RZ, R40 ;
/*7520*/ @P1 LD.E.64 R46, [R46.64] ;
/*7530*/ @P1 LD.E.64 R48, [R48.64] ;
/*7540*/ IADD3 R9, P2, R2, R9, RZ ;
/*7550*/ IMAD.X R36, R19, 0x1, R11, P2 ;
/*7560*/ ISETP.LT.U32.AND P2, PT, R9, R0, PT ;
/*7570*/ ISETP.LT.U32.AND.EX P2, PT, R36, R3, PT, P2 ;
/*7580*/ SEL R9, R9, R0, P2 ;
/*7590*/ SEL R11, R36, R3, P2 ;
/*75a0*/ IADD3 R34, P3, R9, 0x1, RZ ;
/*75b0*/ IMAD.X R59, RZ, RZ, R11, P3 ;
/*75c0*/ LEA R15, P3, R2, R15, 0x3 ;
/*75d0*/ IADD3 R13, P2, R13, R4, RZ ;
/*75e0*/ IMAD.X R39, R39, 0x1, R18, P3 ;
/*75f0*/ IMAD.X R12, R12, 0x1, R37, P2 ;
/*7600*/ @!P0 DFMA R6, R6, R20, R16 ;
/*7610*/ @!P0 DFMA R16, R44, R42, R6 ;
/*7620*/ IADD3 R41, P0, R2, R41, RZ ;
/*7630*/ IMAD.X R14, R19, 0x1, R14, P0 ;
/*7640*/ ISETP.GE.U32.AND P0, PT, R41, R34, PT ;
/*7650*/ ISETP.GE.U32.AND.EX P0, PT, R14, R59, PT, P0 ;
/*7660*/ @P1 DFMA R16, R46, R48, R16 ;
/*7670*/ @!P0 BRA `(.L_x_737) ;
.L_x_733:
/*7680*/ BSYNC B0 ;
.L_x_732:
/*7690*/ ISETP.NE.AND P1, PT, R2, 0x1, PT ;
/*76a0*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*76b0*/ @!P1 BRA `(.L_x_738) ;
/*76c0*/ IADD3 R36, R2.reuse, 0x1f, RZ ;
/*76d0*/ BSSY B6, `(.L_x_739) ;
/*76e0*/ LOP3.LUT P0, R18, R2, 0x1f, RZ, 0xc0, !PT ;
/*76f0*/ SHF.R.U32.HI R34, RZ, 0x5, R36 ;
/*7700*/ IADD3 R3, R34, -0x1, RZ ;
/*7710*/ ISETP.GE.U32.AND P1, PT, R26, R3, PT ;
/*7720*/ @P0 BRA P1, `(.L_x_740) ;
/*7730*/ BRA.DIV ~URZ, `(.L_x_741) ;
/*7740*/ SHFL.DOWN PT, R5, R17, 0x10, 0x1f ;
/*7750*/ SHFL.DOWN PT, R4, R16, 0x10, 0x1f ;
/*7760*/ DADD R6, R16, R4 ;
/*7770*/ SHFL.DOWN PT, R5, R7, 0x8, 0x1f ;
/*7780*/ SHFL.DOWN PT, R4, R6, 0x8, 0x1f ;
/*7790*/ DADD R8, R6, R4 ;
/*77a0*/ SHFL.DOWN PT, R5, R9, 0x4, 0x1f ;
/*77b0*/ SHFL.DOWN PT, R4, R8, 0x4, 0x1f ;
/*77c0*/ DADD R10, R8, R4 ;
/*77d0*/ SHFL.DOWN PT, R5, R11, 0x2, 0x1f ;
/*77e0*/ SHFL.DOWN PT, R4, R10, 0x2, 0x1f ;
/*77f0*/ DADD R18, R10, R4 ;
/*7800*/ SHFL.DOWN PT, R17, R19, 0x1, 0x1f ;
/*7810*/ SHFL.DOWN PT, R4, R18, 0x1, 0x1f ;
.L_x_775:
/*7820*/ MOV R5, R17 ;
/*7830*/ DADD R16, R4, R18 ;
/*7840*/ BRA `(.L_x_742) ;
.L_x_740:
/*7850*/ ISETP.GE.U32.AND P0, PT, R18, 0x2, PT ;
/*7860*/ @!P0 BRA `(.L_x_742) ;
/*7870*/ SHF.R.U32.HI R19, RZ, 0x1, R18 ;
.L_x_744:
/*7880*/ BRA.DIV ~URZ, `(.L_x_743) ;
/*7890*/ SHFL.DOWN PT, R37, R17, R19, 0x1f ;
/*78a0*/ SHFL.DOWN PT, R4, R16, R19, 0x1f ;
.L_x_776:
/*78b0*/ LOP3.LUT R0, R19, 0xffff, RZ, 0xc0, !PT ;
/*78c0*/ IMAD.MOV.U32 R5, RZ, RZ, R37 ;
/*78d0*/ IADD3 R19, R18, 0x1, RZ ;
/*78e0*/ ISETP.GE.U32.AND P0, PT, R27, R0, PT ;
/*78f0*/ FSEL R16, R16, RZ, !P0 ;
/*7900*/ FSEL R17, R17, -RZ, !P0 ;
/*7910*/ ISETP.GT.U32.AND P0, PT, R18, 0x2, PT ;
/*7920*/ SHF.R.U32.HI R18, RZ, 0x1, R19.reuse ;
/*7930*/ DADD R16, R4, R16 ;
/*7940*/ SHF.R.U32.HI R19, RZ, 0x2, R19 ;
/*7950*/ @P0 BRA `(.L_x_744) ;
.L_x_742:
/*7960*/ BSYNC B6 ;
.L_x_739:
/*7970*/ ISETP.GE.U32.AND P1, PT, R2, 0x21, PT ;
/*7980*/ BSSY B7, `(.L_x_738) ;
/*7990*/ ISETP.NE.AND P0, PT, R23, RZ, PT ;
/*79a0*/ @!P1 BRA `(.L_x_745) ;
/*79b0*/ BRA.DIV ~URZ, `(.L_x_746) ;
/*79c0*/ BAR.SYNC 0x7, R2 ;
.L_x_777:
/*79d0*/ ISETP.NE.AND P0, PT, R35, RZ, PT ;
/*79e0*/ BSSY B6, `(.L_x_747) ;
/*79f0*/ @!P0 STS [R24+`(__openmp_nvptx_data_transfer_temporary_storage)], R16 ;
/*7a00*/ LDS R0, [`(IsSPMDMode)] ;
/*7a10*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*7a20*/ @P0 BRA `(.L_x_748) ;
/*7a30*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*7a40*/ ISETP.NE.AND P1, PT, R33, R28, PT ;
/*7a50*/ ISETP.NE.OR P0, PT, R0, RZ, P1 ;
/*7a60*/ @P0 BRA `(.L_x_749) ;
/*7a70*/ @!PT LDS RZ, [RZ] ;
/*7a80*/ @!PT LDS RZ, [RZ] ;
/*7a90*/ @!PT LDS RZ, [RZ] ;
/*7aa0*/ @!PT LDS RZ, [RZ] ;
/*7ab0*/ MEMBAR.SC.GPU ;
/*7ac0*/ ERRBAR;
/*7ad0*/ CCTL.IVALL ;
/*7ae0*/ BRA `(.L_x_750) ;
.L_x_749:
/*7af0*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*7b00*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*7b10*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*7b20*/ SEL R5, R22, R3, !P1 ;
/*7b30*/ SEL R5, R5, 0x1, !P0 ;
/*7b40*/ BRA.DIV ~URZ, `(.L_x_751) ;
/*7b50*/ BAR.SYNC 0x7, R5 ;
/*7b60*/ BRA `(.L_x_750) ;
.L_x_748:
/*7b70*/ WARPSYNC 0xffffffff ;
/*7b80*/ BAR.SYNC 0x0 ;
.L_x_750:
/*7b90*/ BSYNC B6 ;
.L_x_747:
/*7ba0*/ LDS R0, [`(IsSPMDMode)] ;
/*7bb0*/ ISETP.GE.U32.AND P1, PT, R33, R34, PT ;
/*7bc0*/ P2R R2, PR, RZ, 0x2 ;
/*7bd0*/ @!P1 LDS R3, [R25+`(__openmp_nvptx_data_transfer_temporary_storage)] ;
/*7be0*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*7bf0*/ @P0 WARPSYNC 0xffffffff ;
/*7c00*/ @!P1 IMAD.MOV.U32 R16, RZ, RZ, R3 ;
/*7c10*/ @P0 BAR.SYNC 0x0 ;
/*7c20*/ @P0 BRA `(.L_x_752) ;
/*7c30*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*7c40*/ ISETP.EQ.AND P1, PT, R33, R28, PT ;
/*7c50*/ BSSY B6, `(.L_x_752) ;
/*7c60*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*7c70*/ @!P0 BRA P1, `(.L_x_753) ;
/*7c80*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*7c90*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*7ca0*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*7cb0*/ SEL R5, R22, R3, !P1 ;
/*7cc0*/ SEL R5, R5, 0x1, !P0 ;
/*7cd0*/ BRA.DIV ~URZ, `(.L_x_754) ;
/*7ce0*/ BAR.SYNC 0x7, R5 ;
/*7cf0*/ BRA `(.L_x_755) ;
.L_x_753:
/*7d00*/ @!PT LDS RZ, [RZ] ;
/*7d10*/ @!PT LDS RZ, [RZ] ;
/*7d20*/ @!PT LDS RZ, [RZ] ;
/*7d30*/ @!PT LDS RZ, [RZ] ;
/*7d40*/ MEMBAR.SC.GPU ;
/*7d50*/ ERRBAR;
/*7d60*/ CCTL.IVALL ;
.L_x_755:
/*7d70*/ BSYNC B6 ;
.L_x_752:
/*7d80*/ ISETP.NE.AND P0, PT, R35, RZ, PT ;
/*7d90*/ @!P0 STS [R24+`(__openmp_nvptx_data_transfer_temporary_storage)], R17 ;
/*7da0*/ LDS R0, [`(IsSPMDMode)] ;
/*7db0*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*7dc0*/ @P0 WARPSYNC 0xffffffff ;
/*7dd0*/ @P0 BAR.SYNC 0x0 ;
/*7de0*/ @P0 BRA `(.L_x_756) ;
/*7df0*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*7e00*/ ISETP.EQ.AND P1, PT, R33, R28, PT ;
/*7e10*/ BSSY B6, `(.L_x_756) ;
/*7e20*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*7e30*/ @!P0 BRA P1, `(.L_x_757) ;
/*7e40*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*7e50*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*7e60*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*7e70*/ SEL R5, R22, R3, !P1 ;
/*7e80*/ SEL R5, R5, 0x1, !P0 ;
/*7e90*/ BRA.DIV ~URZ, `(.L_x_758) ;
/*7ea0*/ BAR.SYNC 0x7, R5 ;
/*7eb0*/ BRA `(.L_x_759) ;
.L_x_757:
/*7ec0*/ @!PT LDS RZ, [RZ] ;
/*7ed0*/ @!PT LDS RZ, [RZ] ;
/*7ee0*/ @!PT LDS RZ, [RZ] ;
/*7ef0*/ @!PT LDS RZ, [RZ] ;
/*7f00*/ MEMBAR.SC.GPU ;
/*7f10*/ ERRBAR;
/*7f20*/ CCTL.IVALL ;
.L_x_759:
/*7f30*/ BSYNC B6 ;
.L_x_756:
/*7f40*/ ISETP.NE.AND P2, PT, R2, RZ, PT ;
/*7f50*/ ISETP.GE.U32.AND P1, PT, R36, 0x40, PT ;
/*7f60*/ ISETP.NE.AND P0, PT, R23, RZ, PT ;
/*7f70*/ ISETP.GT.U32.OR P1, PT, R33, 0x1f, !P1 ;
/*7f80*/ @!P2 LDS R3, [R25+`(__openmp_nvptx_data_transfer_temporary_storage)] ;
/*7f90*/ @!P2 IMAD.MOV.U32 R17, RZ, RZ, R3 ;
/*7fa0*/ @P1 BRA `(.L_x_745) ;
/*7fb0*/ BSSY B6, `(.L_x_760) ;
/*7fc0*/ SHF.R.U32.HI R36, RZ, 0x6, R36 ;
.L_x_762:
/*7fd0*/ PRMT R37, R36, 0x9910, RZ ;
/*7fe0*/ BRA.DIV ~URZ, `(.L_x_761) ;
/*7ff0*/ SHFL.DOWN PT, R19, R17, R37, 0x1f ;
/*8000*/ SHFL.DOWN PT, R4, R16, R37, 0x1f ;
.L_x_778:
/*8010*/ LOP3.LUT R36, R36, 0xffff, RZ, 0xc0, !PT ;
/*8020*/ IMAD.MOV.U32 R5, RZ, RZ, R19 ;
/*8030*/ LOP3.LUT R3, R33, 0xffff, RZ, 0xc0, !PT ;
/*8040*/ ISETP.GE.U32.AND P0, PT, R3, R36, PT ;
/*8050*/ IADD3 R36, R34, 0x1, RZ ;
/*8060*/ FSEL R16, R16, RZ, !P0 ;
/*8070*/ FSEL R17, R17, -RZ, !P0 ;
/*8080*/ ISETP.GT.U32.AND P0, PT, R34, 0x2, PT ;
/*8090*/ SHF.R.U32.HI R34, RZ, 0x1, R36 ;
/*80a0*/ DADD R16, R4, R16 ;
/*80b0*/ SHF.R.U32.HI R36, RZ, 0x2, R36 ;
/*80c0*/ @P0 BRA `(.L_x_762) ;
/*80d0*/ BSYNC B6 ;
.L_x_760:
/*80e0*/ ISETP.NE.AND P0, PT, R23, RZ, PT ;
.L_x_745:
/*80f0*/ BSYNC B7 ;
.L_x_738:
/*8100*/ @!P0 LDS.64 R2, [`($__c_ratio_shared8_$_0__2711)] ;
/*8110*/ @!P0 DADD R2, R2, R16 ;
/*8120*/ @!P0 STS.64 [`($__c_ratio_shared8_$_0__2711)], R2 ;
.L_x_731:
/*8130*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], RZ ;
/*8140*/ BRA `(.L_x_729) ;
.L_x_730:
/*8150*/ MOV R0, 0x1 ;
/*8160*/ STS [`(_ZN4ompx5state9TeamStateE_$_2)], R3 ;
/*8170*/ STS [`(_ZN4ompx5state9TeamStateE_$_1)], R0 ;
/*8180*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], R0 ;
/*8190*/ BAR.SYNC 0x8 ;
/*81a0*/ BAR.SYNC 0x8 ;
/*81b0*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], RZ ;
/*81c0*/ STS [`(_ZN4ompx5state9TeamStateE_$_1)], RZ ;
/*81d0*/ STS [`(_ZN4ompx5state9TeamStateE_$_2)], R0 ;
.L_x_729:
/*81e0*/ ISETP.NE.AND P1, PT, R33, RZ, PT ;
/*81f0*/ ULDC.64 UR4, c[0x0][0x118] ;
/*8200*/ P2R R23, PR, RZ, 0x2 ;
/*8210*/ @!P1 LDS.64 R2, [`($__iw_shared5_$_0__2697)] ;
/*8220*/ @!P1 LDS.64 R4, [`($__c_ratio_shared8_$_0__2711)] ;
/*8230*/ @!P1 LEA R0, P0, R2, c[0x0][0x1b0], 0x3 ;
/*8240*/ @!P1 LEA.HI.X R3, R2, c[0x0][0x1b4], R3, 0x3, P0 ;
/*8250*/ @!P1 IMAD.MOV.U32 R2, RZ, RZ, R0 ;
/*8260*/ @!P1 STG.E.64 [R2.64], R4 ;
/*8270*/ WARPSYNC 0xffffffff ;
/*8280*/ BAR.SYNC 0x0 ;
/*8290*/ @P1 BRA `(.L_x_763) ;
/*82a0*/ LDS.64 R4, [`($__c_ratio_shared8_$_0__2711)] ;
/*82b0*/ LDS.64 R10, [`($__iw_shared5_$_0__2697)] ;
/*82c0*/ MUFU.RCP64H R3, R5 ;
/*82d0*/ IADD3 R2, R5, 0x300402, RZ ;
/*82e0*/ LEA R16, P1, R10, c[0x0][0x1b8], 0x3 ;
/*82f0*/ FSETP.GEU.AND P0, PT, |R2|, 5.8789094863358348022e-39, PT ;
/*8300*/ LEA.HI.X R17, R10, c[0x0][0x1bc], R11, 0x3, P1 ;
/*8310*/ DFMA R6, -R4, R2, 1 ;
/*8320*/ DFMA R6, R6, R6, R6 ;
/*8330*/ DFMA R6, R2, R6, R2 ;
/*8340*/ DFMA R8, -R4, R6, 1 ;
/*8350*/ DFMA R2, R6, R8, R6 ;
/*8360*/ @P0 BRA `(.L_x_764) ;
/*8370*/ LOP3.LUT R6, R5, 0x7fffffff, RZ, 0xc0, !PT ;
/*8380*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_76@srel)) ;
/*8390*/ IADD3 R6, R6, -0x100000, RZ ;
/*83a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_76@srel)) ;
/*83b0*/ CALL.ABS.NOINC `(__cuda_sm20_dblrcp_rn_slowpath_v3) ;
.L_x_76:
/*83c0*/ MOV R2, R4 ;
/*83d0*/ MOV R3, R5 ;
.L_x_764:
/*83e0*/ ULDC.64 UR4, c[0x0][0x118] ;
/*83f0*/ STG.E.64 [R16.64], R2 ;
.L_x_763:
/*8400*/ IADD3 R32, P0, R32, 0x1, RZ ;
/*8410*/ WARPSYNC 0xffffffff ;
/*8420*/ BAR.SYNC 0x0 ;
/*8430*/ IMAD.X R31, RZ, RZ, R31, P0 ;
/*8440*/ ISETP.GE.U32.AND P0, PT, R32, R30, PT ;
/*8450*/ ISETP.GE.U32.AND.EX P0, PT, R31, R29, PT, P0 ;
/*8460*/ @P0 EXIT ;
/*8470*/ BRA `(.L_x_765) ;
.L_x_623:
/*8480*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*8490*/ MOV R6, 0x1f ;
/*84a0*/ IMAD.MOV.U32 R5, RZ, RZ, 0x10 ;
/*84b0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_77@srel)) ;
/*84c0*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*84d0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_77@srel)) ;
/*84e0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_77:
/*84f0*/ IMAD.MOV.U32 R39, RZ, RZ, R4 ;
/*8500*/ MOV R5, 0x10 ;
/*8510*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8520*/ MOV R4, R18 ;
/*8530*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*8540*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_78@srel)) ;
/*8550*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_78@srel)) ;
/*8560*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_78:
/*8570*/ IMAD.MOV.U32 R40, RZ, RZ, R4 ;
/*8580*/ MOV R5, 0x10 ;
/*8590*/ IMAD.MOV.U32 R41, RZ, RZ, R39 ;
/*85a0*/ MOV R7, 0xffffffff ;
/*85b0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*85c0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_79@srel)) ;
/*85d0*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*85e0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_79@srel)) ;
/*85f0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_79:
/*8600*/ IMAD.MOV.U32 R39, RZ, RZ, R4 ;
/*8610*/ MOV R5, 0x10 ;
/*8620*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8630*/ MOV R4, R16 ;
/*8640*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*8650*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_80@srel)) ;
/*8660*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_80@srel)) ;
/*8670*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_80:
/*8680*/ IMAD.MOV.U32 R42, RZ, RZ, R4 ;
/*8690*/ MOV R5, 0x10 ;
/*86a0*/ IMAD.MOV.U32 R43, RZ, RZ, R39 ;
/*86b0*/ MOV R7, 0xffffffff ;
/*86c0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*86d0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_81@srel)) ;
/*86e0*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*86f0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_81@srel)) ;
/*8700*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_81:
/*8710*/ IMAD.MOV.U32 R37, RZ, RZ, R4 ;
/*8720*/ MOV R5, 0x10 ;
/*8730*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8740*/ MOV R4, R36 ;
/*8750*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*8760*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_82@srel)) ;
/*8770*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_82@srel)) ;
/*8780*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_82:
/*8790*/ DADD R18, R18, R40 ;
/*87a0*/ IMAD.MOV.U32 R5, RZ, RZ, R37 ;
/*87b0*/ MOV R7, 0xffffffff ;
/*87c0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*87d0*/ DADD R40, R16, R42 ;
/*87e0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_83@srel)) ;
/*87f0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_83@srel)) ;
/*8800*/ DADD R36, R4, R48 ;
/*8810*/ MOV R5, 0x8 ;
/*8820*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*8830*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_83:
/*8840*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*8850*/ MOV R5, 0x8 ;
/*8860*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8870*/ MOV R7, 0xffffffff ;
/*8880*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*8890*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_84@srel)) ;
/*88a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_84@srel)) ;
/*88b0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_84:
/*88c0*/ IMAD.MOV.U32 R42, RZ, RZ, R4 ;
/*88d0*/ MOV R5, 0x8 ;
/*88e0*/ IMAD.MOV.U32 R43, RZ, RZ, R17 ;
/*88f0*/ MOV R7, 0xffffffff ;
/*8900*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8910*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_85@srel)) ;
/*8920*/ IMAD.MOV.U32 R4, RZ, RZ, R41 ;
/*8930*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_85@srel)) ;
/*8940*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_85:
/*8950*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*8960*/ MOV R5, 0x8 ;
/*8970*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8980*/ MOV R7, 0xffffffff ;
/*8990*/ IMAD.MOV.U32 R4, RZ, RZ, R40 ;
/*89a0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_86@srel)) ;
/*89b0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_86@srel)) ;
/*89c0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_86:
/*89d0*/ IMAD.MOV.U32 R48, RZ, RZ, R4 ;
/*89e0*/ MOV R5, 0x8 ;
/*89f0*/ IMAD.MOV.U32 R49, RZ, RZ, R17 ;
/*8a00*/ MOV R7, 0xffffffff ;
/*8a10*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8a20*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_87@srel)) ;
/*8a30*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*8a40*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_87@srel)) ;
/*8a50*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_87:
/*8a60*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*8a70*/ MOV R5, 0x8 ;
/*8a80*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8a90*/ MOV R7, 0xffffffff ;
/*8aa0*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*8ab0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_88@srel)) ;
/*8ac0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_88@srel)) ;
/*8ad0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_88:
/*8ae0*/ DADD R18, R18, R42 ;
/*8af0*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*8b00*/ MOV R7, 0xffffffff ;
/*8b10*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8b20*/ DADD R40, R40, R48 ;
/*8b30*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_89@srel)) ;
/*8b40*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_89@srel)) ;
/*8b50*/ DADD R36, R36, R4 ;
/*8b60*/ MOV R5, 0x4 ;
/*8b70*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*8b80*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_89:
/*8b90*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*8ba0*/ MOV R5, 0x4 ;
/*8bb0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8bc0*/ MOV R7, 0xffffffff ;
/*8bd0*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*8be0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_90@srel)) ;
/*8bf0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_90@srel)) ;
/*8c00*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_90:
/*8c10*/ IMAD.MOV.U32 R42, RZ, RZ, R4 ;
/*8c20*/ MOV R5, 0x4 ;
/*8c30*/ IMAD.MOV.U32 R43, RZ, RZ, R17 ;
/*8c40*/ MOV R7, 0xffffffff ;
/*8c50*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8c60*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_91@srel)) ;
/*8c70*/ IMAD.MOV.U32 R4, RZ, RZ, R41 ;
/*8c80*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_91@srel)) ;
/*8c90*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_91:
/*8ca0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*8cb0*/ MOV R5, 0x4 ;
/*8cc0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8cd0*/ MOV R7, 0xffffffff ;
/*8ce0*/ IMAD.MOV.U32 R4, RZ, RZ, R40 ;
/*8cf0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_92@srel)) ;
/*8d00*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_92@srel)) ;
/*8d10*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_92:
/*8d20*/ IMAD.MOV.U32 R48, RZ, RZ, R4 ;
/*8d30*/ MOV R5, 0x4 ;
/*8d40*/ IMAD.MOV.U32 R49, RZ, RZ, R17 ;
/*8d50*/ MOV R7, 0xffffffff ;
/*8d60*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8d70*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_93@srel)) ;
/*8d80*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*8d90*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_93@srel)) ;
/*8da0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_93:
/*8db0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*8dc0*/ MOV R5, 0x4 ;
/*8dd0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8de0*/ MOV R7, 0xffffffff ;
/*8df0*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*8e00*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_94@srel)) ;
/*8e10*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_94@srel)) ;
/*8e20*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_94:
/*8e30*/ DADD R42, R18, R42 ;
/*8e40*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*8e50*/ MOV R7, 0xffffffff ;
/*8e60*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8e70*/ DADD R40, R40, R48 ;
/*8e80*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_95@srel)) ;
/*8e90*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_95@srel)) ;
/*8ea0*/ DADD R36, R36, R4 ;
/*8eb0*/ MOV R5, 0x2 ;
/*8ec0*/ IMAD.MOV.U32 R4, RZ, RZ, R43 ;
/*8ed0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_95:
/*8ee0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*8ef0*/ MOV R5, 0x2 ;
/*8f00*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8f10*/ MOV R7, 0xffffffff ;
/*8f20*/ IMAD.MOV.U32 R4, RZ, RZ, R42 ;
/*8f30*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_96@srel)) ;
/*8f40*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_96@srel)) ;
/*8f50*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_96:
/*8f60*/ IMAD.MOV.U32 R48, RZ, RZ, R4 ;
/*8f70*/ MOV R5, 0x2 ;
/*8f80*/ IMAD.MOV.U32 R49, RZ, RZ, R17 ;
/*8f90*/ MOV R7, 0xffffffff ;
/*8fa0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8fb0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_97@srel)) ;
/*8fc0*/ IMAD.MOV.U32 R4, RZ, RZ, R41 ;
/*8fd0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_97@srel)) ;
/*8fe0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_97:
/*8ff0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*9000*/ MOV R5, 0x2 ;
/*9010*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9020*/ MOV R7, 0xffffffff ;
/*9030*/ IMAD.MOV.U32 R4, RZ, RZ, R40 ;
/*9040*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_98@srel)) ;
/*9050*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_98@srel)) ;
/*9060*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_98:
/*9070*/ IMAD.MOV.U32 R16, RZ, RZ, R4 ;
/*9080*/ MOV R5, 0x2 ;
/*9090*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*90a0*/ MOV R7, 0xffffffff ;
/*90b0*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*90c0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_99@srel)) ;
/*90d0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_99@srel)) ;
/*90e0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_99:
/*90f0*/ IMAD.MOV.U32 R19, RZ, RZ, R4 ;
/*9100*/ MOV R5, 0x2 ;
/*9110*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9120*/ MOV R7, 0xffffffff ;
/*9130*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*9140*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_100@srel)) ;
/*9150*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_100@srel)) ;
/*9160*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_100:
/*9170*/ DADD R42, R42, R48 ;
/*9180*/ IMAD.MOV.U32 R5, RZ, RZ, R19 ;
/*9190*/ MOV R7, 0xffffffff ;
/*91a0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*91b0*/ DADD R16, R40, R16 ;
/*91c0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_101@srel)) ;
/*91d0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_101@srel)) ;
/*91e0*/ DADD R36, R36, R4 ;
/*91f0*/ MOV R5, 0x1 ;
/*9200*/ IMAD.MOV.U32 R4, RZ, RZ, R43 ;
/*9210*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_101:
/*9220*/ IMAD.MOV.U32 R19, RZ, RZ, R4 ;
/*9230*/ MOV R5, 0x1 ;
/*9240*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9250*/ MOV R4, R42 ;
/*9260*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*9270*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_102@srel)) ;
/*9280*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_102@srel)) ;
/*9290*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_102:
/*92a0*/ IMAD.MOV.U32 R18, RZ, RZ, R4 ;
/*92b0*/ MOV R5, 0x1 ;
/*92c0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*92d0*/ MOV R7, 0xffffffff ;
/*92e0*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*92f0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_103@srel)) ;
/*9300*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_103@srel)) ;
/*9310*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_103:
/*9320*/ IMAD.MOV.U32 R39, RZ, RZ, R4 ;
/*9330*/ MOV R5, 0x1 ;
/*9340*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9350*/ MOV R7, 0xffffffff ;
/*9360*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*9370*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_104@srel)) ;
/*9380*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_104@srel)) ;
/*9390*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_104:
/*93a0*/ IMAD.MOV.U32 R40, RZ, RZ, R4 ;
/*93b0*/ MOV R5, 0x1 ;
/*93c0*/ IMAD.MOV.U32 R41, RZ, RZ, R39 ;
/*93d0*/ MOV R7, 0xffffffff ;
/*93e0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*93f0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_105@srel)) ;
/*9400*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*9410*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_105@srel)) ;
/*9420*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_105:
/*9430*/ IMAD.MOV.U32 R39, RZ, RZ, R4 ;
/*9440*/ MOV R5, 0x1 ;
/*9450*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9460*/ MOV R7, 0xffffffff ;
/*9470*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*9480*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_106@srel)) ;
/*9490*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_106@srel)) ;
/*94a0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_106:
/*94b0*/ BRA `(.L_x_766) ;
.L_x_626:
/*94c0*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*94d0*/ MOV R6, 0x1f ;
/*94e0*/ IMAD.MOV.U32 R5, RZ, RZ, R39 ;
/*94f0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_107@srel)) ;
/*9500*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*9510*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_107@srel)) ;
/*9520*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_107:
/*9530*/ IMAD.MOV.U32 R41, RZ, RZ, R4 ;
/*9540*/ MOV R5, R39 ;
/*9550*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9560*/ MOV R7, 0xffffffff ;
/*9570*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*9580*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_108@srel)) ;
/*9590*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_108@srel)) ;
/*95a0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_108:
/*95b0*/ IMAD.MOV.U32 R40, RZ, RZ, R4 ;
/*95c0*/ MOV R5, R39 ;
/*95d0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*95e0*/ MOV R7, 0xffffffff ;
/*95f0*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*9600*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_109@srel)) ;
/*9610*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_109@srel)) ;
/*9620*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_109:
/*9630*/ IMAD.MOV.U32 R43, RZ, RZ, R4 ;
/*9640*/ MOV R5, R39 ;
/*9650*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9660*/ MOV R7, 0xffffffff ;
/*9670*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*9680*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_110@srel)) ;
/*9690*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_110@srel)) ;
/*96a0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_110:
/*96b0*/ IMAD.MOV.U32 R42, RZ, RZ, R4 ;
/*96c0*/ MOV R5, R39 ;
/*96d0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*96e0*/ MOV R7, 0xffffffff ;
/*96f0*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*9700*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_111@srel)) ;
/*9710*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_111@srel)) ;
/*9720*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_111:
/*9730*/ IMAD.MOV.U32 R44, RZ, RZ, R4 ;
/*9740*/ MOV R5, R39 ;
/*9750*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9760*/ MOV R7, 0xffffffff ;
/*9770*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*9780*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_112@srel)) ;
/*9790*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_112@srel)) ;
/*97a0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_112:
/*97b0*/ MOV R2, R4 ;
/*97c0*/ BRA `(.L_x_767) ;
.L_x_635:
/*97d0*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*97e0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_113@srel)) ;
/*97f0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_113@srel)) ;
/*9800*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_113:
/*9810*/ BRA `(.L_x_634) ;
.L_x_642:
/*9820*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*9830*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_114@srel)) ;
/*9840*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_114@srel)) ;
/*9850*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_114:
/*9860*/ BRA `(.L_x_641) ;
.L_x_645:
/*9870*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*9880*/ MOV R6, 0x1f ;
/*9890*/ IMAD.MOV.U32 R5, RZ, RZ, R43 ;
/*98a0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_115@srel)) ;
/*98b0*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*98c0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_115@srel)) ;
/*98d0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_115:
/*98e0*/ IMAD.MOV.U32 R41, RZ, RZ, R4 ;
/*98f0*/ MOV R5, R43 ;
/*9900*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9910*/ MOV R7, 0xffffffff ;
/*9920*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*9930*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_116@srel)) ;
/*9940*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_116@srel)) ;
/*9950*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_116:
/*9960*/ IMAD.MOV.U32 R40, RZ, RZ, R4 ;
/*9970*/ MOV R5, R43 ;
/*9980*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9990*/ MOV R7, 0xffffffff ;
/*99a0*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*99b0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_117@srel)) ;
/*99c0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_117@srel)) ;
/*99d0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_117:
/*99e0*/ IMAD.MOV.U32 R39, RZ, RZ, R4 ;
/*99f0*/ MOV R5, R43 ;
/*9a00*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9a10*/ MOV R7, 0xffffffff ;
/*9a20*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*9a30*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_118@srel)) ;
/*9a40*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_118@srel)) ;
/*9a50*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_118:
/*9a60*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*9a70*/ MOV R5, R43 ;
/*9a80*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9a90*/ MOV R7, 0xffffffff ;
/*9aa0*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*9ab0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_119@srel)) ;
/*9ac0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_119@srel)) ;
/*9ad0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_119:
/*9ae0*/ IMAD.MOV.U32 R22, RZ, RZ, R4 ;
/*9af0*/ MOV R5, R43 ;
/*9b00*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9b10*/ MOV R7, 0xffffffff ;
/*9b20*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*9b30*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_120@srel)) ;
/*9b40*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_120@srel)) ;
/*9b50*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_120:
/*9b60*/ BRA `(.L_x_768) ;
.L_x_663:
/*9b70*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*9b80*/ MOV R6, 0x1f ;
/*9b90*/ IMAD.MOV.U32 R5, RZ, RZ, 0x10 ;
/*9ba0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_121@srel)) ;
/*9bb0*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*9bc0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_121@srel)) ;
/*9bd0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_121:
/*9be0*/ IMAD.MOV.U32 R23, RZ, RZ, R4 ;
/*9bf0*/ MOV R5, 0x10 ;
/*9c00*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9c10*/ MOV R4, R16 ;
/*9c20*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*9c30*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_122@srel)) ;
/*9c40*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_122@srel)) ;
/*9c50*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_122:
/*9c60*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*9c70*/ MOV R5, 0x10 ;
/*9c80*/ IMAD.MOV.U32 R39, RZ, RZ, R23 ;
/*9c90*/ MOV R7, 0xffffffff ;
/*9ca0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9cb0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_123@srel)) ;
/*9cc0*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*9cd0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_123@srel)) ;
/*9ce0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_123:
/*9cf0*/ IMAD.MOV.U32 R23, RZ, RZ, R4 ;
/*9d00*/ MOV R5, 0x10 ;
/*9d10*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9d20*/ MOV R4, R18 ;
/*9d30*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*9d40*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_124@srel)) ;
/*9d50*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_124@srel)) ;
/*9d60*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_124:
/*9d70*/ IMAD.MOV.U32 R40, RZ, RZ, R4 ;
/*9d80*/ MOV R5, 0x10 ;
/*9d90*/ IMAD.MOV.U32 R41, RZ, RZ, R23 ;
/*9da0*/ MOV R7, 0xffffffff ;
/*9db0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9dc0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_125@srel)) ;
/*9dd0*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*9de0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_125@srel)) ;
/*9df0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_125:
/*9e00*/ IMAD.MOV.U32 R23, RZ, RZ, R4 ;
/*9e10*/ MOV R5, 0x10 ;
/*9e20*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9e30*/ MOV R4, R36 ;
/*9e40*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*9e50*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_126@srel)) ;
/*9e60*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_126@srel)) ;
/*9e70*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_126:
/*9e80*/ DADD R38, R16, R38 ;
/*9e90*/ IMAD.MOV.U32 R5, RZ, RZ, R23 ;
/*9ea0*/ MOV R7, 0xffffffff ;
/*9eb0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9ec0*/ DADD R18, R18, R40 ;
/*9ed0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_127@srel)) ;
/*9ee0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_127@srel)) ;
/*9ef0*/ DADD R36, R4, R36 ;
/*9f00*/ MOV R5, 0x8 ;
/*9f10*/ IMAD.MOV.U32 R4, RZ, RZ, R39 ;
/*9f20*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_127:
/*9f30*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*9f40*/ MOV R5, 0x8 ;
/*9f50*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9f60*/ MOV R7, 0xffffffff ;
/*9f70*/ IMAD.MOV.U32 R4, RZ, RZ, R38 ;
/*9f80*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_128@srel)) ;
/*9f90*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_128@srel)) ;
/*9fa0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_128:
/*9fb0*/ IMAD.MOV.U32 R40, RZ, RZ, R4 ;
/*9fc0*/ MOV R5, 0x8 ;
/*9fd0*/ IMAD.MOV.U32 R41, RZ, RZ, R17 ;
/*9fe0*/ MOV R7, 0xffffffff ;
/*9ff0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a000*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_129@srel)) ;
/*a010*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*a020*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_129@srel)) ;
/*a030*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_129:
/*a040*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*a050*/ MOV R5, 0x8 ;
/*a060*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a070*/ MOV R7, 0xffffffff ;
/*a080*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*a090*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_130@srel)) ;
/*a0a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_130@srel)) ;
/*a0b0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_130:
/*a0c0*/ IMAD.MOV.U32 R46, RZ, RZ, R4 ;
/*a0d0*/ MOV R5, 0x8 ;
/*a0e0*/ IMAD.MOV.U32 R47, RZ, RZ, R17 ;
/*a0f0*/ MOV R7, 0xffffffff ;
/*a100*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a110*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_131@srel)) ;
/*a120*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*a130*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_131@srel)) ;
/*a140*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_131:
/*a150*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*a160*/ MOV R5, 0x8 ;
/*a170*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a180*/ MOV R7, 0xffffffff ;
/*a190*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*a1a0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_132@srel)) ;
/*a1b0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_132@srel)) ;
/*a1c0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_132:
/*a1d0*/ DADD R38, R38, R40 ;
/*a1e0*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*a1f0*/ MOV R7, 0xffffffff ;
/*a200*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a210*/ DADD R18, R18, R46 ;
/*a220*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_133@srel)) ;
/*a230*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_133@srel)) ;
/*a240*/ DADD R36, R36, R4 ;
/*a250*/ MOV R5, 0x4 ;
/*a260*/ IMAD.MOV.U32 R4, RZ, RZ, R39 ;
/*a270*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_133:
/*a280*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*a290*/ MOV R5, 0x4 ;
/*a2a0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a2b0*/ MOV R7, 0xffffffff ;
/*a2c0*/ IMAD.MOV.U32 R4, RZ, RZ, R38 ;
/*a2d0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_134@srel)) ;
/*a2e0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_134@srel)) ;
/*a2f0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_134:
/*a300*/ IMAD.MOV.U32 R40, RZ, RZ, R4 ;
/*a310*/ MOV R5, 0x4 ;
/*a320*/ IMAD.MOV.U32 R41, RZ, RZ, R17 ;
/*a330*/ MOV R7, 0xffffffff ;
/*a340*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a350*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_135@srel)) ;
/*a360*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*a370*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_135@srel)) ;
/*a380*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_135:
/*a390*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*a3a0*/ MOV R5, 0x4 ;
/*a3b0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a3c0*/ MOV R7, 0xffffffff ;
/*a3d0*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*a3e0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_136@srel)) ;
/*a3f0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_136@srel)) ;
/*a400*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_136:
/*a410*/ IMAD.MOV.U32 R46, RZ, RZ, R4 ;
/*a420*/ MOV R5, 0x4 ;
/*a430*/ IMAD.MOV.U32 R47, RZ, RZ, R17 ;
/*a440*/ MOV R7, 0xffffffff ;
/*a450*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a460*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_137@srel)) ;
/*a470*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*a480*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_137@srel)) ;
/*a490*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_137:
/*a4a0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*a4b0*/ MOV R5, 0x4 ;
/*a4c0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a4d0*/ MOV R7, 0xffffffff ;
/*a4e0*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*a4f0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_138@srel)) ;
/*a500*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_138@srel)) ;
/*a510*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_138:
/*a520*/ DADD R38, R38, R40 ;
/*a530*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*a540*/ MOV R7, 0xffffffff ;
/*a550*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a560*/ DADD R18, R18, R46 ;
/*a570*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_139@srel)) ;
/*a580*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_139@srel)) ;
/*a590*/ DADD R36, R36, R4 ;
/*a5a0*/ MOV R5, 0x2 ;
/*a5b0*/ IMAD.MOV.U32 R4, RZ, RZ, R39 ;
/*a5c0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_139:
/*a5d0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*a5e0*/ MOV R5, 0x2 ;
/*a5f0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a600*/ MOV R7, 0xffffffff ;
/*a610*/ IMAD.MOV.U32 R4, RZ, RZ, R38 ;
/*a620*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_140@srel)) ;
/*a630*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_140@srel)) ;
/*a640*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_140:
/*a650*/ IMAD.MOV.U32 R40, RZ, RZ, R4 ;
/*a660*/ MOV R5, 0x2 ;
/*a670*/ IMAD.MOV.U32 R41, RZ, RZ, R17 ;
/*a680*/ MOV R7, 0xffffffff ;
/*a690*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a6a0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_141@srel)) ;
/*a6b0*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*a6c0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_141@srel)) ;
/*a6d0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_141:
/*a6e0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*a6f0*/ MOV R5, 0x2 ;
/*a700*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a710*/ MOV R7, 0xffffffff ;
/*a720*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*a730*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_142@srel)) ;
/*a740*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_142@srel)) ;
/*a750*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_142:
/*a760*/ IMAD.MOV.U32 R46, RZ, RZ, R4 ;
/*a770*/ MOV R5, 0x2 ;
/*a780*/ IMAD.MOV.U32 R47, RZ, RZ, R17 ;
/*a790*/ MOV R7, 0xffffffff ;
/*a7a0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a7b0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_143@srel)) ;
/*a7c0*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*a7d0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_143@srel)) ;
/*a7e0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_143:
/*a7f0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*a800*/ MOV R5, 0x2 ;
/*a810*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a820*/ MOV R7, 0xffffffff ;
/*a830*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*a840*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_144@srel)) ;
/*a850*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_144@srel)) ;
/*a860*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_144:
/*a870*/ DADD R38, R38, R40 ;
/*a880*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*a890*/ MOV R7, 0xffffffff ;
/*a8a0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a8b0*/ DADD R18, R18, R46 ;
/*a8c0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_145@srel)) ;
/*a8d0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_145@srel)) ;
/*a8e0*/ DADD R36, R36, R4 ;
/*a8f0*/ MOV R5, 0x1 ;
/*a900*/ IMAD.MOV.U32 R4, RZ, RZ, R39.reuse ;
/*a910*/ MOV R40, R38 ;
/*a920*/ IMAD.MOV.U32 R41, RZ, RZ, R39 ;
/*a930*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_145:
/*a940*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*a950*/ MOV R5, 0x1 ;
/*a960*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a970*/ MOV R4, R38 ;
/*a980*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*a990*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_146@srel)) ;
/*a9a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_146@srel)) ;
/*a9b0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_146:
/*a9c0*/ IMAD.MOV.U32 R16, RZ, RZ, R4 ;
/*a9d0*/ MOV R5, 0x1 ;
/*a9e0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a9f0*/ MOV R7, 0xffffffff ;
/*aa00*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*aa10*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_147@srel)) ;
/*aa20*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_147@srel)) ;
/*aa30*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_147:
/*aa40*/ IMAD.MOV.U32 R23, RZ, RZ, R4 ;
/*aa50*/ MOV R5, 0x1 ;
/*aa60*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*aa70*/ MOV R7, 0xffffffff ;
/*aa80*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*aa90*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_148@srel)) ;
/*aaa0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_148@srel)) ;
/*aab0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_148:
/*aac0*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*aad0*/ MOV R5, 0x1 ;
/*aae0*/ IMAD.MOV.U32 R39, RZ, RZ, R23 ;
/*aaf0*/ MOV R7, 0xffffffff ;
/*ab00*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*ab10*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_149@srel)) ;
/*ab20*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*ab30*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_149@srel)) ;
/*ab40*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_149:
/*ab50*/ IMAD.MOV.U32 R23, RZ, RZ, R4 ;
/*ab60*/ MOV R5, 0x1 ;
/*ab70*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*ab80*/ MOV R7, 0xffffffff ;
/*ab90*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*aba0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_150@srel)) ;
/*abb0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_150@srel)) ;
/*abc0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_150:
/*abd0*/ BRA `(.L_x_769) ;
.L_x_666:
/*abe0*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*abf0*/ MOV R6, 0x1f ;
/*ac00*/ IMAD.MOV.U32 R5, RZ, RZ, R23 ;
/*ac10*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_151@srel)) ;
/*ac20*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*ac30*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_151@srel)) ;
/*ac40*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_151:
/*ac50*/ IMAD.MOV.U32 R41, RZ, RZ, R4 ;
/*ac60*/ MOV R5, R23 ;
/*ac70*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*ac80*/ MOV R7, 0xffffffff ;
/*ac90*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*aca0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_152@srel)) ;
/*acb0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_152@srel)) ;
/*acc0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_152:
/*acd0*/ IMAD.MOV.U32 R40, RZ, RZ, R4 ;
/*ace0*/ MOV R5, R23 ;
/*acf0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*ad00*/ MOV R7, 0xffffffff ;
/*ad10*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*ad20*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_153@srel)) ;
/*ad30*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_153@srel)) ;
/*ad40*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_153:
/*ad50*/ IMAD.MOV.U32 R39, RZ, RZ, R4 ;
/*ad60*/ MOV R5, R23 ;
/*ad70*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*ad80*/ MOV R7, 0xffffffff ;
/*ad90*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*ada0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_154@srel)) ;
/*adb0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_154@srel)) ;
/*adc0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_154:
/*add0*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*ade0*/ MOV R5, R23 ;
/*adf0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*ae00*/ MOV R7, 0xffffffff ;
/*ae10*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*ae20*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_155@srel)) ;
/*ae30*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_155@srel)) ;
/*ae40*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_155:
/*ae50*/ IMAD.MOV.U32 R42, RZ, RZ, R4 ;
/*ae60*/ MOV R5, R23 ;
/*ae70*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*ae80*/ MOV R7, 0xffffffff ;
/*ae90*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*aea0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_156@srel)) ;
/*aeb0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_156@srel)) ;
/*aec0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_156:
/*aed0*/ MOV R2, R4 ;
/*aee0*/ BRA `(.L_x_770) ;
.L_x_674:
/*aef0*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*af00*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_157@srel)) ;
/*af10*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_157@srel)) ;
/*af20*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_157:
/*af30*/ BRA `(.L_x_673) ;
.L_x_681:
/*af40*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*af50*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_158@srel)) ;
/*af60*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_158@srel)) ;
/*af70*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_158:
/*af80*/ BRA `(.L_x_680) ;
.L_x_685:
/*af90*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*afa0*/ MOV R6, 0x1f ;
/*afb0*/ IMAD.MOV.U32 R5, RZ, RZ, R41 ;
/*afc0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_159@srel)) ;
/*afd0*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*afe0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_159@srel)) ;
/*aff0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_159:
/*b000*/ IMAD.MOV.U32 R39, RZ, RZ, R4 ;
/*b010*/ MOV R5, R41 ;
/*b020*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b030*/ MOV R7, 0xffffffff ;
/*b040*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*b050*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_160@srel)) ;
/*b060*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_160@srel)) ;
/*b070*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_160:
/*b080*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*b090*/ MOV R5, R41 ;
/*b0a0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b0b0*/ MOV R7, 0xffffffff ;
/*b0c0*/ IMAD.MOV.U32 R4, RZ, RZ, R23 ;
/*b0d0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_161@srel)) ;
/*b0e0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_161@srel)) ;
/*b0f0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_161:
/*b100*/ IMAD.MOV.U32 R37, RZ, RZ, R4 ;
/*b110*/ MOV R5, R41 ;
/*b120*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b130*/ MOV R7, 0xffffffff ;
/*b140*/ IMAD.MOV.U32 R4, RZ, RZ, R22 ;
/*b150*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_162@srel)) ;
/*b160*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_162@srel)) ;
/*b170*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_162:
/*b180*/ IMAD.MOV.U32 R36, RZ, RZ, R4 ;
/*b190*/ MOV R5, R41 ;
/*b1a0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b1b0*/ MOV R7, 0xffffffff ;
/*b1c0*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*b1d0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_163@srel)) ;
/*b1e0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_163@srel)) ;
/*b1f0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_163:
/*b200*/ IMAD.MOV.U32 R34, RZ, RZ, R4 ;
/*b210*/ MOV R5, R41 ;
/*b220*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b230*/ MOV R7, 0xffffffff ;
/*b240*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*b250*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_164@srel)) ;
/*b260*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_164@srel)) ;
/*b270*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_164:
/*b280*/ BRA `(.L_x_771) ;
.L_x_703:
/*b290*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*b2a0*/ MOV R6, 0x1f ;
/*b2b0*/ IMAD.MOV.U32 R5, RZ, RZ, 0x10 ;
/*b2c0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_165@srel)) ;
/*b2d0*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*b2e0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_165@srel)) ;
/*b2f0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_165:
/*b300*/ IMAD.MOV.U32 R19, RZ, RZ, R4 ;
/*b310*/ MOV R5, 0x10 ;
/*b320*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b330*/ MOV R7, 0xffffffff ;
/*b340*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*b350*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_166@srel)) ;
/*b360*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_166@srel)) ;
/*b370*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_166:
/*b380*/ IMAD.MOV.U32 R5, RZ, RZ, R19 ;
/*b390*/ MOV R7, 0xffffffff ;
/*b3a0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b3b0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_167@srel)) ;
/*b3c0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_167@srel)) ;
/*b3d0*/ DADD R18, R16, R4 ;
/*b3e0*/ MOV R5, 0x8 ;
/*b3f0*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*b400*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_167:
/*b410*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*b420*/ MOV R5, 0x8 ;
/*b430*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b440*/ MOV R7, 0xffffffff ;
/*b450*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*b460*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_168@srel)) ;
/*b470*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_168@srel)) ;
/*b480*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_168:
/*b490*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*b4a0*/ MOV R7, 0xffffffff ;
/*b4b0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b4c0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_169@srel)) ;
/*b4d0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_169@srel)) ;
/*b4e0*/ DADD R18, R18, R4 ;
/*b4f0*/ MOV R5, 0x4 ;
/*b500*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*b510*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_169:
/*b520*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*b530*/ MOV R5, 0x4 ;
/*b540*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b550*/ MOV R7, 0xffffffff ;
/*b560*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*b570*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_170@srel)) ;
/*b580*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_170@srel)) ;
/*b590*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_170:
/*b5a0*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*b5b0*/ MOV R7, 0xffffffff ;
/*b5c0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b5d0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_171@srel)) ;
/*b5e0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_171@srel)) ;
/*b5f0*/ DADD R18, R18, R4 ;
/*b600*/ MOV R5, 0x2 ;
/*b610*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*b620*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_171:
/*b630*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*b640*/ MOV R5, 0x2 ;
/*b650*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b660*/ MOV R7, 0xffffffff ;
/*b670*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*b680*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_172@srel)) ;
/*b690*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_172@srel)) ;
/*b6a0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_172:
/*b6b0*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*b6c0*/ MOV R7, 0xffffffff ;
/*b6d0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b6e0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_173@srel)) ;
/*b6f0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_173@srel)) ;
/*b700*/ DADD R18, R18, R4 ;
/*b710*/ MOV R5, 0x1 ;
/*b720*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*b730*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_173:
/*b740*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*b750*/ MOV R5, 0x1 ;
/*b760*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b770*/ MOV R4, R18 ;
/*b780*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*b790*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_174@srel)) ;
/*b7a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_174@srel)) ;
/*b7b0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_174:
/*b7c0*/ BRA `(.L_x_772) ;
.L_x_705:
/*b7d0*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*b7e0*/ MOV R6, 0x1f ;
/*b7f0*/ IMAD.MOV.U32 R5, RZ, RZ, R19 ;
/*b800*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_175@srel)) ;
/*b810*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*b820*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_175@srel)) ;
/*b830*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_175:
/*b840*/ IMAD.MOV.U32 R23, RZ, RZ, R4 ;
/*b850*/ MOV R5, R19 ;
/*b860*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b870*/ MOV R7, 0xffffffff ;
/*b880*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*b890*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_176@srel)) ;
/*b8a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_176@srel)) ;
/*b8b0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_176:
/*b8c0*/ BRA `(.L_x_773) ;
.L_x_713:
/*b8d0*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*b8e0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_177@srel)) ;
/*b8f0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_177@srel)) ;
/*b900*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_177:
/*b910*/ BRA `(.L_x_712) ;
.L_x_718:
/*b920*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*b930*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_178@srel)) ;
/*b940*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_178@srel)) ;
/*b950*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_178:
/*b960*/ BRA `(.L_x_717) ;
.L_x_721:
/*b970*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*b980*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_179@srel)) ;
/*b990*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_179@srel)) ;
/*b9a0*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_179:
/*b9b0*/ BRA `(.L_x_722) ;
.L_x_725:
/*b9c0*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*b9d0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_180@srel)) ;
/*b9e0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_180@srel)) ;
/*b9f0*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_180:
/*ba00*/ BRA `(.L_x_726) ;
.L_x_727:
/*ba10*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*ba20*/ MOV R6, 0x1f ;
/*ba30*/ IMAD.MOV.U32 R5, RZ, RZ, R23 ;
/*ba40*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_181@srel)) ;
/*ba50*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*ba60*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_181@srel)) ;
/*ba70*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_181:
/*ba80*/ IMAD.MOV.U32 R19, RZ, RZ, R4 ;
/*ba90*/ MOV R5, R23 ;
/*baa0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*bab0*/ MOV R7, 0xffffffff ;
/*bac0*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*bad0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_182@srel)) ;
/*bae0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_182@srel)) ;
/*baf0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_182:
/*bb00*/ BRA `(.L_x_774) ;
.L_x_741:
/*bb10*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*bb20*/ MOV R6, 0x1f ;
/*bb30*/ IMAD.MOV.U32 R5, RZ, RZ, 0x10 ;
/*bb40*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_183@srel)) ;
/*bb50*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*bb60*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_183@srel)) ;
/*bb70*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_183:
/*bb80*/ IMAD.MOV.U32 R19, RZ, RZ, R4 ;
/*bb90*/ MOV R5, 0x10 ;
/*bba0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*bbb0*/ MOV R7, 0xffffffff ;
/*bbc0*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*bbd0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_184@srel)) ;
/*bbe0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_184@srel)) ;
/*bbf0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_184:
/*bc00*/ IMAD.MOV.U32 R5, RZ, RZ, R19 ;
/*bc10*/ MOV R7, 0xffffffff ;
/*bc20*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*bc30*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_185@srel)) ;
/*bc40*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_185@srel)) ;
/*bc50*/ DADD R18, R16, R4 ;
/*bc60*/ MOV R5, 0x8 ;
/*bc70*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*bc80*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_185:
/*bc90*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*bca0*/ MOV R5, 0x8 ;
/*bcb0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*bcc0*/ MOV R7, 0xffffffff ;
/*bcd0*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*bce0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_186@srel)) ;
/*bcf0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_186@srel)) ;
/*bd00*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_186:
/*bd10*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*bd20*/ MOV R7, 0xffffffff ;
/*bd30*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*bd40*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_187@srel)) ;
/*bd50*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_187@srel)) ;
/*bd60*/ DADD R18, R18, R4 ;
/*bd70*/ MOV R5, 0x4 ;
/*bd80*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*bd90*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_187:
/*bda0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*bdb0*/ MOV R5, 0x4 ;
/*bdc0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*bdd0*/ MOV R7, 0xffffffff ;
/*bde0*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*bdf0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_188@srel)) ;
/*be00*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_188@srel)) ;
/*be10*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_188:
/*be20*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*be30*/ MOV R7, 0xffffffff ;
/*be40*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*be50*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_189@srel)) ;
/*be60*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_189@srel)) ;
/*be70*/ DADD R18, R18, R4 ;
/*be80*/ MOV R5, 0x2 ;
/*be90*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*bea0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_189:
/*beb0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*bec0*/ MOV R5, 0x2 ;
/*bed0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*bee0*/ MOV R7, 0xffffffff ;
/*bef0*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*bf00*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_190@srel)) ;
/*bf10*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_190@srel)) ;
/*bf20*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_190:
/*bf30*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*bf40*/ MOV R7, 0xffffffff ;
/*bf50*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*bf60*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_191@srel)) ;
/*bf70*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_191@srel)) ;
/*bf80*/ DADD R18, R18, R4 ;
/*bf90*/ MOV R5, 0x1 ;
/*bfa0*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*bfb0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_191:
/*bfc0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*bfd0*/ MOV R5, 0x1 ;
/*bfe0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*bff0*/ MOV R4, R18 ;
/*c000*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*c010*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_192@srel)) ;
/*c020*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_192@srel)) ;
/*c030*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_192:
/*c040*/ BRA `(.L_x_775) ;
.L_x_743:
/*c050*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*c060*/ MOV R6, 0x1f ;
/*c070*/ IMAD.MOV.U32 R5, RZ, RZ, R19 ;
/*c080*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_193@srel)) ;
/*c090*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*c0a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_193@srel)) ;
/*c0b0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_193:
/*c0c0*/ IMAD.MOV.U32 R37, RZ, RZ, R4 ;
/*c0d0*/ MOV R5, R19 ;
/*c0e0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*c0f0*/ MOV R7, 0xffffffff ;
/*c100*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*c110*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_194@srel)) ;
/*c120*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_194@srel)) ;
/*c130*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_194:
/*c140*/ BRA `(.L_x_776) ;
.L_x_746:
/*c150*/ IMAD.MOV.U32 R5, RZ, RZ, R2 ;
/*c160*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_195@srel)) ;
/*c170*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*c180*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_195@srel)) ;
/*c190*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_195:
/*c1a0*/ BRA `(.L_x_777) ;
.L_x_751:
/*c1b0*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*c1c0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_196@srel)) ;
/*c1d0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_196@srel)) ;
/*c1e0*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_196:
/*c1f0*/ BRA `(.L_x_750) ;
.L_x_754:
/*c200*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*c210*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_197@srel)) ;
/*c220*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_197@srel)) ;
/*c230*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_197:
/*c240*/ BRA `(.L_x_755) ;
.L_x_758:
/*c250*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*c260*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_198@srel)) ;
/*c270*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_198@srel)) ;
/*c280*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_198:
/*c290*/ BRA `(.L_x_759) ;
.L_x_761:
/*c2a0*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*c2b0*/ MOV R6, 0x1f ;
/*c2c0*/ IMAD.MOV.U32 R5, RZ, RZ, R37 ;
/*c2d0*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_199@srel)) ;
/*c2e0*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*c2f0*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_199@srel)) ;
/*c300*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_199:
/*c310*/ IMAD.MOV.U32 R19, RZ, RZ, R4 ;
/*c320*/ MOV R5, R37 ;
/*c330*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*c340*/ MOV R7, 0xffffffff ;
/*c350*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*c360*/ MOV R20, 32@lo((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_200@srel)) ;
/*c370*/ MOV R21, 32@hi((__omp_offloading_10301_25680f9__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_200@srel)) ;
/*c380*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_200:
/*c390*/ BRA `(.L_x_778) ;
.L_x_779:
/*c3a0*/ BRA `(.L_x_779);
/*c3b0*/ NOP;
/*c3c0*/ NOP;
/*c3d0*/ NOP;
/*c3e0*/ NOP;
/*c3f0*/ NOP;
/*c400*/ NOP;
/*c410*/ NOP;
/*c420*/ NOP;
/*c430*/ NOP;
/*c440*/ NOP;
/*c450*/ NOP;
/*c460*/ NOP;
/*c470*/ NOP;
.L_x_976:
// .weak __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 // -- Begin function __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783
.weak .entry __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783(
.param .u64 __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_0,
.param .u64 __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_1,
.param .u64 __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_2,
.param .u64 __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_3,
.param .u64 __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_4,
.param .u64 __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_5,
.param .u64 __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_6,
.param .u64 __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_7,
.param .u64 __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_8,
.param .u64 __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_9,
.param .u64 __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_10,
.param .u64 __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_11
)
.maxntid 128, 1, 1 // @__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783
{
.local .align 8 .b8 __local_depot4[24];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<209>;
.reg .b16 %rs<13>;
.reg .b32 %r<516>;
.reg .b64 %rd<488>;
.reg .f64 %fd<294>;
.loc 1 783 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:0
$L__func_begin4:
.loc 1 783 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:0
// demoted variable
.shared .align 16 .u64 NumPtcls1_shared2_$_0;
// demoted variable
.shared .align 16 .u64 confgListOccup_ptr2_shared3_$_0;
// demoted variable
.shared .align 16 .u64 psiV_temp_list_ptr3_shared4_$_0;
// demoted variable
.shared .align 16 .u32 iw_shared5_$_0;
// demoted variable
.shared .align 16 .u64 psiV_list_devptr4_shared6_$_0;
// demoted variable
.shared .align 16 .b8 ratioGradRef_local_shared[24];
// demoted variable
.shared .align 16 .u64 psiMinv_temp_list_devptr5_shared_$_0;
// demoted variable
.shared .align 16 .u64 psiMinv_cols6_shared_$_0;
// demoted variable
.shared .align 16 .u32 WorkingIndex7_shared7_$_0;
// demoted variable
.shared .align 16 .u64 dpsiV_list_ptr8_shared_$_0;
// demoted variable
.shared .align 16 .f64 c_ratio_shared8_$_0;
// %bb.0: // %entry
mov.u64 %SPL, __local_depot4;
$L__tmp112:
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
mov.u32 %r1, %tid.x;
setp.ne.s32 %p6, %r1, 0;
mov.u32 %r492, 0;
@%p6 bra $L__BB4_2;
// %bb.1: // %if.then.i.i
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u32 %r108, 1;
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
st.shared.u32 [IsSPMDMode], %r108;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r492;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r492;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r108;
$L__BB4_2: // %__kmpc_target_init.exit
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
ld.param.u64 %rd196, [__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_0];
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
bar.sync 0;
bar.sync 0;
$L__tmp113:
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
@%p6 bra $L__BB4_4;
// %bb.3: // %region.guarded.i
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
ld.param.u64 %rd204, [__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_8];
ld.param.u64 %rd203, [__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_7];
ld.param.u64 %rd202, [__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_6];
ld.param.u64 %rd201, [__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_5];
ld.param.u64 %rd200, [__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_4];
ld.param.u64 %rd199, [__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_3];
ld.param.u64 %rd198, [__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_2];
ld.param.u64 %rd197, [__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_1];
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
st.shared.u64 [NumPtcls1_shared2_$_0], %rd197;
st.shared.u64 [confgListOccup_ptr2_shared3_$_0], %rd198;
st.shared.u64 [psiV_temp_list_ptr3_shared4_$_0], %rd199;
st.shared.u64 [psiV_list_devptr4_shared6_$_0], %rd200;
st.shared.u64 [psiMinv_temp_list_devptr5_shared_$_0], %rd201;
st.shared.u64 [psiMinv_cols6_shared_$_0], %rd202;
st.shared.u32 [WorkingIndex7_shared7_$_0], %rd203;
st.shared.u64 [dpsiV_list_ptr8_shared_$_0], %rd204;
$L__BB4_4: // %region.barrier.i
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
cvt.u32.u64 %r2, %rd196;
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
bar.sync 0;
.loc 1 785 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:785:36
@%p6 bra $L__BB4_6;
// %bb.5: // %region.guarded12.i
st.shared.u32 [iw_shared5_$_0], %r492;
$L__BB4_6: // %region.barrier10.i
bar.sync 0;
.loc 1 785 5 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:785:5
setp.eq.s32 %p9, %r2, 0;
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
@%p9 bra $L__BB4_229;
// %bb.7: // %omp.precond.then.i
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
add.s32 %r3, %r2, -1;
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
mov.u32 %r4, %ctaid.x;
mov.u32 %r111, %nctaid.x;
div.u32 %r493, %r2, %r111;
mul.lo.s32 %r112, %r493, %r111;
sub.s32 %r6, %r2, %r112;
setp.le.u32 %p10, %r6, %r4;
@%p10 bra $L__BB4_9;
// %bb.8: // %if.then.i90.i.i.i
add.s32 %r113, %r493, 1;
mul.lo.s32 %r495, %r113, %r4;
bra.uni $L__BB4_10;
$L__BB4_9: // %if.else.i79.i.i.i
mad.lo.s32 %r495, %r493, %r4, %r6;
add.s32 %r493, %r493, -1;
$L__BB4_10: // %__kmpc_distribute_static_init_4u.exit.i
add.s32 %r114, %r495, %r493;
.loc 1 785 10 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:785:10
min.u32 %r12, %r114, %r3;
.loc 1 785 5 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:785:5
setp.gt.u32 %p11, %r495, %r12;
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
@%p11 bra $L__BB4_229;
bra.uni $L__BB4_11;
$L__tmp114:
$L__BB4_229: // %__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_omp_outlined.exit
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
ret;
$L__BB4_11: // %region.check.tid18.lr.ph.i
ld.param.u64 %rd205, [__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_11];
ld.param.u64 %rd206, [__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_10];
ld.param.u64 %rd207, [__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783_param_9];
cvta.to.global.u64 %rd1, %rd205;
cvta.to.global.u64 %rd2, %rd206;
cvta.to.global.u64 %rd3, %rd207;
add.u64 %rd4, %SPL, 0;
mov.u32 %r491, 1;
mov.u32 %r13, %ntid.x;
add.s32 %r115, %r13, -1;
and.b32 %r14, %r115, -32;
shr.u32 %r15, %r1, 5;
cvt.u16.u32 %rs1, %r1;
and.b16 %rs2, %rs1, 31;
and.b32 %r16, %r1, 31;
shr.s32 %r116, %r1, 5;
add.s32 %r17, %r13, -32;
mul.wide.s32 %rd210, %r116, 4;
mov.u64 %rd211, __openmp_nvptx_data_transfer_temporary_storage;
add.s64 %rd6, %rd211, %rd210;
mul.wide.s32 %rd212, %r1, 4;
add.s64 %rd7, %rd211, %rd212;
mov.u64 %rd213, 0;
mov.f64 %fd143, 0d0000000000000000;
setp.ne.s32 %p81, %r1, %r14;
setp.ne.s32 %p86, %r16, 0;
mov.pred %p168, 0;
bra.uni $L__BB4_12;
$L__BB4_228: // %region.barrier30.i
// in Loop: Header=BB4_12 Depth=1
$L__tmp115:
.loc 1 805 33 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:805:33
bar.sync 0;
.loc 1 785 5 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:785:5
add.s32 %r495, %r495, 1;
setp.le.u32 %p206, %r495, %r12;
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
@%p206 bra $L__BB4_12;
bra.uni $L__BB4_229;
$L__BB4_12: // %region.check.tid18.i
// =>This Loop Header: Depth=1
// Child Loop BB4_30 Depth 2
// Child Loop BB4_31 Depth 3
// Child Loop BB4_41 Depth 2
// Child Loop BB4_47 Depth 2
// Child Loop BB4_64 Depth 2
// Child Loop BB4_78 Depth 2
// Child Loop BB4_79 Depth 3
// Child Loop BB4_86 Depth 2
// Child Loop BB4_92 Depth 2
// Child Loop BB4_109 Depth 2
// Child Loop BB4_130 Depth 2
// Child Loop BB4_131 Depth 3
// Child Loop BB4_141 Depth 2
// Child Loop BB4_173 Depth 2
// Child Loop BB4_184 Depth 2
// Child Loop BB4_185 Depth 3
// Child Loop BB4_192 Depth 2
// Child Loop BB4_219 Depth 2
.loc 1 785 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:785:36
@%p6 bra $L__BB4_14;
// %bb.13: // %region.guarded17.i
// in Loop: Header=BB4_12 Depth=1
st.shared.u32 [iw_shared5_$_0], %r495;
$L__BB4_14: // %region.barrier15.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
$L__tmp116:
.loc 2 83 12 // qmcpack/src/Containers/OhmmsPETE/TinyVector.h:83:12
@%p6 bra $L__BB4_16;
// %bb.15: // %region.guarded.i.i.i
// in Loop: Header=BB4_12 Depth=1
st.shared.u64 [ratioGradRef_local_shared], %rd213;
$L__BB4_16: // %region.barrier.i.i.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
@%p6 bra $L__BB4_18;
// %bb.17: // %region.guarded.1.i.i.i
// in Loop: Header=BB4_12 Depth=1
st.shared.u64 [ratioGradRef_local_shared+8], %rd213;
$L__BB4_18: // %region.barrier.1.i.i.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
@%p6 bra $L__BB4_20;
// %bb.19: // %region.guarded.2.i.i.i
// in Loop: Header=BB4_12 Depth=1
st.shared.u64 [ratioGradRef_local_shared+16], %rd213;
$L__BB4_20: // %_ZN11qmcplusplus10TinyVectorIdLj3EEC1ERKd.internalized.exit.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
$L__tmp117:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
ld.shared.u32 %r117, [IsSPMDMode];
setp.eq.s32 %p16, %r117, 0;
selp.b32 %r118, -32, 0, %p16;
add.s32 %r119, %r118, %r13;
setp.lt.u32 %p17, %r119, 32;
and.b32 %r120, %r119, -32;
selp.b32 %r19, 1, %r120, %p17;
selp.b32 %r121, %r19, %r13, %p16;
setp.eq.s32 %p18, %r121, %r119;
selp.b32 %r20, 0, %r121, %p18;
@%p16 bra $L__BB4_74;
$L__tmp118:
// %bb.21: // %if.then32.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
setp.ne.s32 %p19, %r1, 0;
$L__tmp119:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
bar.sync 0;
@%p19 bra $L__BB4_23;
// %bb.22: // %if.else.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r20;
mov.u32 %r122, 1;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r122;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r122;
$L__BB4_23: // %_ZN4ompx5state9ValueRAIIINS0_5ValueIjLNS0_9ValueKindE1EEEjEC1ERS4_jjbP7IdentTyb.exit.i.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
bar.sync 0;
add.s32 %r123, %r20, -1;
setp.lt.u32 %p20, %r123, %r1;
@%p20 bra $L__BB4_71;
// %bb.24: // %if.then40.i.i
// in Loop: Header=BB4_12 Depth=1
$L__tmp120:
.loc 1 789 30 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:30
ld.shared.u64 %rd216, [NumPtcls1_shared2_$_0];
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
setp.eq.s64 %p21, %rd216, 0;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p21 bra $L__BB4_71;
// %bb.25: // %omp.precond.then.i1409.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r21, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p22, %r21, 1;
mov.u32 %r496, %r491;
@%p22 bra $L__BB4_28;
// %bb.26: // %cond.false.i.i.i1497.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r496, [_ZN4ompx5state9TeamStateE_$_2];
setp.ne.s32 %p23, %r496, 0;
@%p23 bra $L__BB4_28;
// %bb.27: // %cond.false.i.i.i.i1499.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r125, [IsSPMDMode];
setp.eq.s32 %p24, %r125, 0;
selp.b32 %r126, -32, 0, %p24;
add.s32 %r496, %r126, %r13;
$L__BB4_28: // %__kmpc_for_static_init_8u.exit.i1411.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
add.s64 %rd8, %rd216, -1;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
ld.shared.u32 %r22, [_ZN4ompx5state9TeamStateE_$_1];
setp.eq.s32 %p25, %r21, 0;
setp.eq.s32 %p26, %r22, %r21;
selp.b32 %r127, %r1, 0, %p26;
selp.b32 %r128, 0, %r127, %p25;
cvt.s64.s32 %rd430, %r128;
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
setp.lt.u64 %p27, %rd8, %rd430;
mov.f64 %fd248, 0d0000000000000000;
mov.f64 %fd247, %fd248;
mov.f64 %fd246, %fd248;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p27 bra $L__BB4_33;
// %bb.29: // %omp.inner.for.cond.preheader.lr.ph.i1419.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
cvt.s64.s32 %rd10, %r496;
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd429, %rd430, 1;
$L__tmp121:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
ld.shared.u64 %rd217, [confgListOccup_ptr2_shared3_$_0];
ld.shared.u64 %rd218, [psiV_list_devptr4_shared6_$_0];
ld.shared.u32 %r129, [iw_shared5_$_0];
mul.wide.u32 %rd219, %r129, 8;
add.s64 %rd220, %rd218, %rd219;
ld.u64 %rd12, [%rd220];
ld.shared.u64 %rd221, [psiV_temp_list_ptr3_shared4_$_0];
add.s64 %rd222, %rd221, %rd219;
ld.u64 %rd223, [%rd222];
ld.shared.u64 %rd224, [psiMinv_temp_list_devptr5_shared_$_0];
add.s64 %rd225, %rd224, %rd219;
ld.u64 %rd226, [%rd225];
ld.shared.u64 %rd227, [psiMinv_cols6_shared_$_0];
ld.shared.s32 %rd228, [WorkingIndex7_shared7_$_0];
ld.shared.u64 %rd229, [dpsiV_list_ptr8_shared_$_0];
add.s64 %rd230, %rd229, %rd219;
ld.u64 %rd13, [%rd230];
$L__tmp122:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
mul.lo.s64 %rd231, %rd227, %rd430;
add.s64 %rd232, %rd231, %rd228;
shl.b64 %rd233, %rd232, 3;
add.s64 %rd428, %rd226, %rd233;
shl.b64 %rd15, %rd227, 3;
mul.lo.s64 %rd16, %rd15, %rd10;
shl.b64 %rd234, %rd430, 3;
add.s64 %rd427, %rd223, %rd234;
shl.b64 %rd18, %rd10, 3;
add.s64 %rd426, %rd217, %rd234;
mov.f64 %fd246, 0d0000000000000000;
mov.u64 %rd431, %rd430;
mov.f64 %fd247, %fd246;
mov.f64 %fd248, %fd246;
$L__BB4_30: // %omp.inner.for.cond.preheader.i1424.i.i
// Parent Loop BB4_12 Depth=1
// => This Loop Header: Depth=2
// Child Loop BB4_31 Depth 3
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u64 %rd432, %rd426;
mov.u64 %rd433, %rd427;
mov.u64 %rd434, %rd428;
mov.u64 %rd435, %rd431;
$L__BB4_31: // %omp.inner.for.body.i1425.i.i
// Parent Loop BB4_12 Depth=1
// Parent Loop BB4_30 Depth=2
// => This Inner Loop Header: Depth=3
.loc 1 791 26 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:791:26
ld.u64 %rd235, [%rd432];
.loc 1 792 37 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:792:37
shl.b64 %rd236, %rd235, 3;
add.s64 %rd237, %rd12, %rd236;
ld.f64 %fd86, [%rd237];
.loc 1 792 35 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:792:35
st.f64 [%rd433], %fd86;
.loc 1 793 95 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:793:95
mul.lo.s64 %rd238, %rd235, 24;
add.s64 %rd239, %rd13, %rd238;
.loc 1 793 93 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:793:93
ld.f64 %fd87, [%rd434];
$L__tmp123:
.loc 3 297 34 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:297:34
ld.f64 %fd88, [%rd239];
.loc 3 297 51 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:297:51
ld.f64 %fd89, [%rd239+8];
.loc 3 297 68 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:297:68
ld.f64 %fd90, [%rd239+16];
$L__tmp124:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
fma.rn.f64 %fd246, %fd88, %fd87, %fd246;
$L__tmp125:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
fma.rn.f64 %fd247, %fd89, %fd87, %fd247;
$L__tmp126:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
fma.rn.f64 %fd248, %fd90, %fd87, %fd248;
$L__tmp127:
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd435, %rd435, 1;
add.s64 %rd434, %rd434, %rd15;
add.s64 %rd433, %rd433, 8;
add.s64 %rd432, %rd432, 8;
setp.lt.u64 %p28, %rd435, %rd429;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p28 bra $L__BB4_31;
// %bb.32: // %omp.dispatch.inc.i1433.i.i
// in Loop: Header=BB4_30 Depth=2
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd431, %rd431, %rd10;
add.s64 %rd240, %rd430, %rd10;
.loc 1 789 12 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:12
min.u64 %rd430, %rd240, %rd8;
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd429, %rd430, 1;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
add.s64 %rd428, %rd428, %rd16;
add.s64 %rd427, %rd427, %rd18;
add.s64 %rd426, %rd426, %rd18;
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
setp.lt.u64 %p29, %rd431, %rd429;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p29 bra $L__BB4_30;
$L__tmp128:
$L__BB4_33: // %omp.dispatch.end.i1439.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
st.local.f64 [%rd4], %fd246;
st.local.f64 [%rd4+8], %fd247;
st.local.f64 [%rd4+16], %fd248;
mov.u32 %r497, 1;
$L__tmp129:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p22 bra $L__BB4_36;
// %bb.34: // %cond.false.i.i18.i.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r497, [_ZN4ompx5state9TeamStateE_$_2];
setp.ne.s32 %p33, %r497, 0;
@%p33 bra $L__BB4_36;
// %bb.35: // %cond.false.i.i.i19.i.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r132, [IsSPMDMode];
setp.eq.s32 %p34, %r132, 0;
selp.b32 %r133, -32, 0, %p34;
add.s32 %r497, %r133, %r13;
$L__BB4_36: // %omp_get_num_threads.exit.i.i1445.i.i
// in Loop: Header=BB4_12 Depth=1
setp.eq.s32 %p35, %r497, 1;
@%p35 bra $L__BB4_70;
// %bb.37: // %if.end5.i.i.i1447.i.i
// in Loop: Header=BB4_12 Depth=1
mov.b64 %rd438, %fd248;
mov.b64 %rd437, %fd246;
mov.b64 %rd436, %fd247;
add.s32 %r30, %r497, 31;
shr.u32 %r501, %r30, 5;
and.b32 %r499, %r497, 31;
setp.ne.s32 %p36, %r499, 0;
add.s32 %r134, %r501, -1;
setp.ge.u32 %p37, %r15, %r134;
and.pred %p38, %p36, %p37;
@%p38 bra $L__BB4_39;
// %bb.38: // %for.body.i.i.preheader.i.i1492.i.i
// in Loop: Header=BB4_12 Depth=1
$L__tmp130:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
// begin inline asm
mov.b64 {%r148,%r149}, %rd437;
// end inline asm
shfl.sync.down.b32 %r151, %r149, 16, 31, -1;
shfl.sync.down.b32 %r150, %r148, 16, 31, -1;
// begin inline asm
mov.b64 %rd248, {%r150,%r151};
// end inline asm
// begin inline asm
mov.b64 {%r152,%r153}, %rd436;
// end inline asm
shfl.sync.down.b32 %r155, %r153, 16, 31, -1;
shfl.sync.down.b32 %r154, %r152, 16, 31, -1;
// begin inline asm
mov.b64 %rd250, {%r154,%r155};
// end inline asm
// begin inline asm
mov.b64 {%r156,%r157}, %rd438;
// end inline asm
shfl.sync.down.b32 %r159, %r157, 16, 31, -1;
shfl.sync.down.b32 %r158, %r156, 16, 31, -1;
// begin inline asm
mov.b64 %rd252, {%r158,%r159};
// end inline asm
mov.b64 %fd100, %rd250;
mov.b64 %fd101, %rd248;
$L__tmp131:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd102, %fd246, %fd101;
$L__tmp132:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd103, %fd247, %fd100;
$L__tmp133:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %fd104, %rd252;
$L__tmp134:
add.f64 %fd105, %fd248, %fd104;
$L__tmp135:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd253, %fd102;
// begin inline asm
mov.b64 {%r160,%r161}, %rd253;
// end inline asm
shfl.sync.down.b32 %r163, %r161, 8, 31, -1;
shfl.sync.down.b32 %r162, %r160, 8, 31, -1;
// begin inline asm
mov.b64 %rd254, {%r162,%r163};
// end inline asm
mov.b64 %rd255, %fd103;
// begin inline asm
mov.b64 {%r164,%r165}, %rd255;
// end inline asm
shfl.sync.down.b32 %r167, %r165, 8, 31, -1;
shfl.sync.down.b32 %r166, %r164, 8, 31, -1;
// begin inline asm
mov.b64 %rd256, {%r166,%r167};
// end inline asm
mov.b64 %rd257, %fd105;
// begin inline asm
mov.b64 {%r168,%r169}, %rd257;
// end inline asm
shfl.sync.down.b32 %r171, %r169, 8, 31, -1;
shfl.sync.down.b32 %r170, %r168, 8, 31, -1;
// begin inline asm
mov.b64 %rd258, {%r170,%r171};
// end inline asm
mov.b64 %fd106, %rd258;
mov.b64 %fd107, %rd256;
mov.b64 %fd108, %rd254;
$L__tmp136:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd109, %fd102, %fd108;
$L__tmp137:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd110, %fd103, %fd107;
$L__tmp138:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd111, %fd105, %fd106;
$L__tmp139:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd259, %fd109;
// begin inline asm
mov.b64 {%r172,%r173}, %rd259;
// end inline asm
shfl.sync.down.b32 %r175, %r173, 4, 31, -1;
shfl.sync.down.b32 %r174, %r172, 4, 31, -1;
// begin inline asm
mov.b64 %rd260, {%r174,%r175};
// end inline asm
mov.b64 %rd261, %fd110;
// begin inline asm
mov.b64 {%r176,%r177}, %rd261;
// end inline asm
shfl.sync.down.b32 %r179, %r177, 4, 31, -1;
shfl.sync.down.b32 %r178, %r176, 4, 31, -1;
// begin inline asm
mov.b64 %rd262, {%r178,%r179};
// end inline asm
mov.b64 %rd263, %fd111;
// begin inline asm
mov.b64 {%r180,%r181}, %rd263;
// end inline asm
shfl.sync.down.b32 %r183, %r181, 4, 31, -1;
shfl.sync.down.b32 %r182, %r180, 4, 31, -1;
// begin inline asm
mov.b64 %rd264, {%r182,%r183};
// end inline asm
mov.b64 %fd112, %rd264;
mov.b64 %fd113, %rd262;
mov.b64 %fd114, %rd260;
$L__tmp140:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd115, %fd109, %fd114;
$L__tmp141:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd116, %fd110, %fd113;
$L__tmp142:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd117, %fd111, %fd112;
$L__tmp143:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd265, %fd115;
// begin inline asm
mov.b64 {%r184,%r185}, %rd265;
// end inline asm
shfl.sync.down.b32 %r187, %r185, 2, 31, -1;
shfl.sync.down.b32 %r186, %r184, 2, 31, -1;
// begin inline asm
mov.b64 %rd266, {%r186,%r187};
// end inline asm
mov.b64 %rd267, %fd116;
// begin inline asm
mov.b64 {%r188,%r189}, %rd267;
// end inline asm
shfl.sync.down.b32 %r191, %r189, 2, 31, -1;
shfl.sync.down.b32 %r190, %r188, 2, 31, -1;
// begin inline asm
mov.b64 %rd268, {%r190,%r191};
// end inline asm
mov.b64 %rd269, %fd117;
// begin inline asm
mov.b64 {%r192,%r193}, %rd269;
// end inline asm
shfl.sync.down.b32 %r195, %r193, 2, 31, -1;
shfl.sync.down.b32 %r194, %r192, 2, 31, -1;
// begin inline asm
mov.b64 %rd270, {%r194,%r195};
// end inline asm
mov.b64 %fd118, %rd270;
mov.b64 %fd119, %rd268;
mov.b64 %fd120, %rd266;
$L__tmp144:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd121, %fd115, %fd120;
$L__tmp145:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd122, %fd116, %fd119;
$L__tmp146:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd123, %fd117, %fd118;
$L__tmp147:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd271, %fd121;
// begin inline asm
mov.b64 {%r196,%r197}, %rd271;
// end inline asm
shfl.sync.down.b32 %r199, %r197, 1, 31, -1;
shfl.sync.down.b32 %r198, %r196, 1, 31, -1;
// begin inline asm
mov.b64 %rd272, {%r198,%r199};
// end inline asm
mov.b64 %rd273, %fd122;
// begin inline asm
mov.b64 {%r200,%r201}, %rd273;
// end inline asm
shfl.sync.down.b32 %r203, %r201, 1, 31, -1;
shfl.sync.down.b32 %r202, %r200, 1, 31, -1;
// begin inline asm
mov.b64 %rd274, {%r202,%r203};
// end inline asm
mov.b64 %rd275, %fd123;
// begin inline asm
mov.b64 {%r204,%r205}, %rd275;
// end inline asm
shfl.sync.down.b32 %r207, %r205, 1, 31, -1;
shfl.sync.down.b32 %r206, %r204, 1, 31, -1;
// begin inline asm
mov.b64 %rd276, {%r206,%r207};
// end inline asm
mov.b64 %fd124, %rd276;
mov.b64 %fd125, %rd274;
mov.b64 %fd126, %rd272;
$L__tmp148:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd246, %fd121, %fd126;
st.local.f64 [%rd4], %fd246;
$L__tmp149:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd247, %fd122, %fd125;
st.local.f64 [%rd4+8], %fd247;
$L__tmp150:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd127, %fd123, %fd124;
st.local.f64 [%rd4+16], %fd127;
$L__tmp151:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
bra.uni $L__BB4_45;
$L__tmp152:
$L__BB4_74: // %if.end45.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.gt.u32 %p65, %r19, 1;
@%p65 bra $L__BB4_117;
bra.uni $L__BB4_75;
$L__BB4_117: // %__kmpc_begin_sharing_variables.exit.i.i
// in Loop: Header=BB4_12 Depth=1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r20;
mov.u32 %r350, 1;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r350;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r350;
mov.u32 %r348, 8;
// begin inline asm
barrier.sync %r348;
// end inline asm
// begin inline asm
barrier.sync %r348;
// end inline asm
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r492;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r492;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r350;
bra.uni $L__BB4_118;
$L__BB4_75: // %if.else.i.i4468.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u32 %r237, 1;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r237;
$L__tmp153:
.loc 1 789 30 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:30
ld.shared.u64 %rd284, [NumPtcls1_shared2_$_0];
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
setp.eq.s64 %p66, %rd284, 0;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p66 bra $L__BB4_116;
// %bb.76: // %omp.precond.then.i7073.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
add.s64 %rd73, %rd284, -1;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
ld.shared.u32 %r238, [_ZN4ompx5state9TeamStateE_$_1];
ld.shared.u32 %r239, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p67, %r239, 0;
selp.b32 %r240, %r17, %r239, %p67;
setp.eq.s32 %p68, %r238, 1;
selp.b32 %r241, %r1, 0, %p68;
cvt.s64.s32 %rd453, %r241;
cvt.s64.s32 %rd75, %r240;
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
setp.lt.u64 %p69, %rd73, %rd453;
mov.f64 %fd264, %fd143;
mov.f64 %fd263, %fd143;
mov.f64 %fd262, %fd143;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p69 bra $L__BB4_81;
// %bb.77: // %omp.inner.for.cond.preheader.lr.ph.i7086.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd452, %rd453, 1;
$L__tmp154:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
ld.shared.u64 %rd285, [confgListOccup_ptr2_shared3_$_0];
ld.shared.u64 %rd286, [psiV_list_devptr4_shared6_$_0];
ld.shared.u32 %r242, [iw_shared5_$_0];
mul.wide.u32 %rd287, %r242, 8;
add.s64 %rd288, %rd286, %rd287;
ld.u64 %rd77, [%rd288];
ld.shared.u64 %rd289, [psiV_temp_list_ptr3_shared4_$_0];
add.s64 %rd290, %rd289, %rd287;
ld.u64 %rd291, [%rd290];
ld.shared.u64 %rd292, [psiMinv_temp_list_devptr5_shared_$_0];
add.s64 %rd293, %rd292, %rd287;
ld.u64 %rd294, [%rd293];
ld.shared.u64 %rd295, [psiMinv_cols6_shared_$_0];
ld.shared.s32 %rd296, [WorkingIndex7_shared7_$_0];
ld.shared.u64 %rd297, [dpsiV_list_ptr8_shared_$_0];
add.s64 %rd298, %rd297, %rd287;
ld.u64 %rd78, [%rd298];
$L__tmp155:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
mul.lo.s64 %rd299, %rd295, %rd453;
add.s64 %rd300, %rd299, %rd296;
shl.b64 %rd301, %rd300, 3;
add.s64 %rd451, %rd294, %rd301;
shl.b64 %rd80, %rd295, 3;
mul.lo.s64 %rd81, %rd80, %rd75;
shl.b64 %rd302, %rd453, 3;
add.s64 %rd450, %rd291, %rd302;
shl.b64 %rd83, %rd75, 3;
add.s64 %rd449, %rd285, %rd302;
mov.f64 %fd262, 0d0000000000000000;
mov.u64 %rd454, %rd453;
mov.f64 %fd263, %fd262;
mov.f64 %fd264, %fd262;
$L__BB4_78: // %omp.inner.for.cond.preheader.i7094.i.i
// Parent Loop BB4_12 Depth=1
// => This Loop Header: Depth=2
// Child Loop BB4_79 Depth 3
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u64 %rd455, %rd449;
mov.u64 %rd456, %rd450;
mov.u64 %rd457, %rd451;
mov.u64 %rd458, %rd454;
$L__BB4_79: // %omp.inner.for.body.i7101.i.i
// Parent Loop BB4_12 Depth=1
// Parent Loop BB4_78 Depth=2
// => This Inner Loop Header: Depth=3
.loc 1 791 26 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:791:26
ld.u64 %rd303, [%rd455];
.loc 1 792 37 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:792:37
shl.b64 %rd304, %rd303, 3;
add.s64 %rd305, %rd77, %rd304;
ld.f64 %fd145, [%rd305];
.loc 1 792 35 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:792:35
st.f64 [%rd456], %fd145;
.loc 1 793 95 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:793:95
mul.lo.s64 %rd306, %rd303, 24;
add.s64 %rd307, %rd78, %rd306;
.loc 1 793 93 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:793:93
ld.f64 %fd146, [%rd457];
$L__tmp156:
.loc 3 297 34 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:297:34
ld.f64 %fd147, [%rd307];
.loc 3 297 51 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:297:51
ld.f64 %fd148, [%rd307+8];
.loc 3 297 68 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:297:68
ld.f64 %fd149, [%rd307+16];
$L__tmp157:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
fma.rn.f64 %fd262, %fd147, %fd146, %fd262;
$L__tmp158:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
fma.rn.f64 %fd263, %fd148, %fd146, %fd263;
$L__tmp159:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
fma.rn.f64 %fd264, %fd149, %fd146, %fd264;
$L__tmp160:
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd458, %rd458, 1;
add.s64 %rd457, %rd457, %rd80;
add.s64 %rd456, %rd456, 8;
add.s64 %rd455, %rd455, 8;
setp.lt.u64 %p70, %rd458, %rd452;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p70 bra $L__BB4_79;
// %bb.80: // %omp.dispatch.inc.i7125.i.i
// in Loop: Header=BB4_78 Depth=2
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd454, %rd454, %rd75;
add.s64 %rd308, %rd453, %rd75;
.loc 1 789 12 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:12
min.u64 %rd453, %rd308, %rd73;
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
add.s64 %rd452, %rd453, 1;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
add.s64 %rd451, %rd451, %rd81;
add.s64 %rd450, %rd450, %rd83;
add.s64 %rd449, %rd449, %rd83;
.loc 1 789 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:789:7
setp.lt.u64 %p71, %rd454, %rd452;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p71 bra $L__BB4_78;
$L__tmp161:
$L__BB4_81: // %omp.dispatch.end.i7131.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
cvt.u32.u64 %r243, %rd75;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
st.local.f64 [%rd4], %fd262;
st.local.f64 [%rd4+8], %fd263;
st.local.f64 [%rd4+16], %fd264;
$L__tmp162:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.eq.s32 %p72, %r243, 1;
@%p72 bra $L__BB4_115;
// %bb.82: // %if.end5.i.i.i7143.i.i
// in Loop: Header=BB4_12 Depth=1
mov.b64 %rd461, %fd264;
mov.b64 %rd460, %fd262;
mov.b64 %rd459, %fd263;
add.s32 %r45, %r243, 31;
shr.u32 %r505, %r45, 5;
and.b32 %r503, %r243, 31;
setp.ne.s32 %p73, %r503, 0;
add.s32 %r245, %r505, -1;
setp.ge.u32 %p74, %r15, %r245;
and.pred %p75, %p73, %p74;
@%p75 bra $L__BB4_84;
// %bb.83: // %for.body.i.i.preheader.i.i7281.i.i
// in Loop: Header=BB4_12 Depth=1
$L__tmp163:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
// begin inline asm
mov.b64 {%r259,%r260}, %rd460;
// end inline asm
shfl.sync.down.b32 %r262, %r260, 16, 31, -1;
shfl.sync.down.b32 %r261, %r259, 16, 31, -1;
// begin inline asm
mov.b64 %rd316, {%r261,%r262};
// end inline asm
// begin inline asm
mov.b64 {%r263,%r264}, %rd459;
// end inline asm
shfl.sync.down.b32 %r266, %r264, 16, 31, -1;
shfl.sync.down.b32 %r265, %r263, 16, 31, -1;
// begin inline asm
mov.b64 %rd318, {%r265,%r266};
// end inline asm
// begin inline asm
mov.b64 {%r267,%r268}, %rd461;
// end inline asm
shfl.sync.down.b32 %r270, %r268, 16, 31, -1;
shfl.sync.down.b32 %r269, %r267, 16, 31, -1;
// begin inline asm
mov.b64 %rd320, {%r269,%r270};
// end inline asm
mov.b64 %fd159, %rd318;
mov.b64 %fd160, %rd316;
$L__tmp164:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd161, %fd262, %fd160;
$L__tmp165:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd162, %fd263, %fd159;
$L__tmp166:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %fd163, %rd320;
$L__tmp167:
add.f64 %fd164, %fd264, %fd163;
$L__tmp168:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd321, %fd161;
// begin inline asm
mov.b64 {%r271,%r272}, %rd321;
// end inline asm
shfl.sync.down.b32 %r274, %r272, 8, 31, -1;
shfl.sync.down.b32 %r273, %r271, 8, 31, -1;
// begin inline asm
mov.b64 %rd322, {%r273,%r274};
// end inline asm
mov.b64 %rd323, %fd162;
// begin inline asm
mov.b64 {%r275,%r276}, %rd323;
// end inline asm
shfl.sync.down.b32 %r278, %r276, 8, 31, -1;
shfl.sync.down.b32 %r277, %r275, 8, 31, -1;
// begin inline asm
mov.b64 %rd324, {%r277,%r278};
// end inline asm
mov.b64 %rd325, %fd164;
// begin inline asm
mov.b64 {%r279,%r280}, %rd325;
// end inline asm
shfl.sync.down.b32 %r282, %r280, 8, 31, -1;
shfl.sync.down.b32 %r281, %r279, 8, 31, -1;
// begin inline asm
mov.b64 %rd326, {%r281,%r282};
// end inline asm
mov.b64 %fd165, %rd326;
mov.b64 %fd166, %rd324;
mov.b64 %fd167, %rd322;
$L__tmp169:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd168, %fd161, %fd167;
$L__tmp170:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd169, %fd162, %fd166;
$L__tmp171:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd170, %fd164, %fd165;
$L__tmp172:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd327, %fd168;
// begin inline asm
mov.b64 {%r283,%r284}, %rd327;
// end inline asm
shfl.sync.down.b32 %r286, %r284, 4, 31, -1;
shfl.sync.down.b32 %r285, %r283, 4, 31, -1;
// begin inline asm
mov.b64 %rd328, {%r285,%r286};
// end inline asm
mov.b64 %rd329, %fd169;
// begin inline asm
mov.b64 {%r287,%r288}, %rd329;
// end inline asm
shfl.sync.down.b32 %r290, %r288, 4, 31, -1;
shfl.sync.down.b32 %r289, %r287, 4, 31, -1;
// begin inline asm
mov.b64 %rd330, {%r289,%r290};
// end inline asm
mov.b64 %rd331, %fd170;
// begin inline asm
mov.b64 {%r291,%r292}, %rd331;
// end inline asm
shfl.sync.down.b32 %r294, %r292, 4, 31, -1;
shfl.sync.down.b32 %r293, %r291, 4, 31, -1;
// begin inline asm
mov.b64 %rd332, {%r293,%r294};
// end inline asm
mov.b64 %fd171, %rd332;
mov.b64 %fd172, %rd330;
mov.b64 %fd173, %rd328;
$L__tmp173:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd174, %fd168, %fd173;
$L__tmp174:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd175, %fd169, %fd172;
$L__tmp175:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd176, %fd170, %fd171;
$L__tmp176:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd333, %fd174;
// begin inline asm
mov.b64 {%r295,%r296}, %rd333;
// end inline asm
shfl.sync.down.b32 %r298, %r296, 2, 31, -1;
shfl.sync.down.b32 %r297, %r295, 2, 31, -1;
// begin inline asm
mov.b64 %rd334, {%r297,%r298};
// end inline asm
mov.b64 %rd335, %fd175;
// begin inline asm
mov.b64 {%r299,%r300}, %rd335;
// end inline asm
shfl.sync.down.b32 %r302, %r300, 2, 31, -1;
shfl.sync.down.b32 %r301, %r299, 2, 31, -1;
// begin inline asm
mov.b64 %rd336, {%r301,%r302};
// end inline asm
mov.b64 %rd337, %fd176;
// begin inline asm
mov.b64 {%r303,%r304}, %rd337;
// end inline asm
shfl.sync.down.b32 %r306, %r304, 2, 31, -1;
shfl.sync.down.b32 %r305, %r303, 2, 31, -1;
// begin inline asm
mov.b64 %rd338, {%r305,%r306};
// end inline asm
mov.b64 %fd177, %rd338;
mov.b64 %fd178, %rd336;
mov.b64 %fd179, %rd334;
$L__tmp177:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd180, %fd174, %fd179;
$L__tmp178:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd181, %fd175, %fd178;
$L__tmp179:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd182, %fd176, %fd177;
$L__tmp180:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd339, %fd180;
// begin inline asm
mov.b64 {%r307,%r308}, %rd339;
// end inline asm
shfl.sync.down.b32 %r310, %r308, 1, 31, -1;
shfl.sync.down.b32 %r309, %r307, 1, 31, -1;
// begin inline asm
mov.b64 %rd340, {%r309,%r310};
// end inline asm
mov.b64 %rd341, %fd181;
// begin inline asm
mov.b64 {%r311,%r312}, %rd341;
// end inline asm
shfl.sync.down.b32 %r314, %r312, 1, 31, -1;
shfl.sync.down.b32 %r313, %r311, 1, 31, -1;
// begin inline asm
mov.b64 %rd342, {%r313,%r314};
// end inline asm
mov.b64 %rd343, %fd182;
// begin inline asm
mov.b64 {%r315,%r316}, %rd343;
// end inline asm
shfl.sync.down.b32 %r318, %r316, 1, 31, -1;
shfl.sync.down.b32 %r317, %r315, 1, 31, -1;
// begin inline asm
mov.b64 %rd344, {%r317,%r318};
// end inline asm
mov.b64 %fd183, %rd344;
mov.b64 %fd184, %rd342;
mov.b64 %fd185, %rd340;
$L__tmp181:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd262, %fd180, %fd185;
st.local.f64 [%rd4], %fd262;
$L__tmp182:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd263, %fd181, %fd184;
st.local.f64 [%rd4+8], %fd263;
$L__tmp183:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd186, %fd182, %fd183;
st.local.f64 [%rd4+16], %fd186;
$L__tmp184:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
bra.uni $L__BB4_90;
$L__BB4_39: // %if.then15.i.i.i1459.i.i
// in Loop: Header=BB4_12 Depth=1
setp.lt.u32 %p39, %r499, 2;
@%p39 bra $L__BB4_45;
// %bb.40: // %while.body.lr.ph.i.i.i.i1461.i.i
// in Loop: Header=BB4_12 Depth=1
shr.u32 %r498, %r499, 1;
bra.uni $L__BB4_41;
$L__BB4_43: // %_omp_reduction_shuffle_and_reduce_func.exit.i.i.i.i
// in Loop: Header=BB4_41 Depth=2
add.s32 %r147, %r499, 1;
shr.u32 %r36, %r147, 1;
shr.u32 %r498, %r147, 2;
setp.gt.u32 %p41, %r499, 2;
mov.u64 %rd436, %rd440;
mov.u64 %rd437, %rd439;
mov.u64 %rd438, %rd441;
mov.u32 %r499, %r36;
@%p41 bra $L__BB4_41;
bra.uni $L__BB4_44;
$L__BB4_41: // %while.body.i.i.i.i1464.i.i
// Parent Loop BB4_12 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs5, %r498;
$L__tmp185:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
// begin inline asm
mov.b64 {%r135,%r136}, %rd437;
// end inline asm
shfl.sync.down.b32 %r138, %r136, %r498, 31, -1;
shfl.sync.down.b32 %r137, %r135, %r498, 31, -1;
// begin inline asm
mov.b64 %rd439, {%r137,%r138};
// end inline asm
// begin inline asm
mov.b64 {%r139,%r140}, %rd436;
// end inline asm
shfl.sync.down.b32 %r142, %r140, %r498, 31, -1;
shfl.sync.down.b32 %r141, %r139, %r498, 31, -1;
// begin inline asm
mov.b64 %rd440, {%r141,%r142};
// end inline asm
// begin inline asm
mov.b64 {%r143,%r144}, %rd438;
// end inline asm
shfl.sync.down.b32 %r146, %r144, %r498, 31, -1;
shfl.sync.down.b32 %r145, %r143, %r498, 31, -1;
// begin inline asm
mov.b64 %rd441, {%r145,%r146};
// end inline asm
setp.ge.u16 %p40, %rs2, %rs5;
@%p40 bra $L__BB4_43;
// %bb.42: // %then.i22.i.i.i.i
// in Loop: Header=BB4_41 Depth=2
mov.b64 %fd91, %rd438;
mov.b64 %fd92, %rd436;
mov.b64 %fd93, %rd437;
mov.b64 %fd94, %rd441;
mov.b64 %fd95, %rd440;
mov.b64 %fd96, %rd439;
$L__tmp186:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd97, %fd96, %fd93;
$L__tmp187:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd439, %fd97;
$L__tmp188:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd98, %fd95, %fd92;
$L__tmp189:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd440, %fd98;
$L__tmp190:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd99, %fd94, %fd91;
mov.b64 %rd441, %fd99;
bra.uni $L__BB4_43;
$L__tmp191:
$L__BB4_84: // %if.then15.i.i.i7155.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.lt.u32 %p76, %r503, 2;
@%p76 bra $L__BB4_90;
// %bb.85: // %while.body.lr.ph.i.i.i.i7157.i.i
// in Loop: Header=BB4_12 Depth=1
shr.u32 %r502, %r503, 1;
bra.uni $L__BB4_86;
$L__BB4_88: // %_omp_reduction_shuffle_and_reduce_func.exit.i.i7171.i.i
// in Loop: Header=BB4_86 Depth=2
add.s32 %r258, %r503, 1;
shr.u32 %r51, %r258, 1;
shr.u32 %r502, %r258, 2;
setp.gt.u32 %p78, %r503, 2;
mov.u64 %rd459, %rd463;
mov.u64 %rd460, %rd462;
mov.u64 %rd461, %rd464;
mov.u32 %r503, %r51;
@%p78 bra $L__BB4_86;
bra.uni $L__BB4_89;
$L__BB4_86: // %while.body.i.i.i.i7160.i.i
// Parent Loop BB4_12 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs7, %r502;
$L__tmp192:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
// begin inline asm
mov.b64 {%r246,%r247}, %rd460;
// end inline asm
shfl.sync.down.b32 %r249, %r247, %r502, 31, -1;
shfl.sync.down.b32 %r248, %r246, %r502, 31, -1;
// begin inline asm
mov.b64 %rd462, {%r248,%r249};
// end inline asm
// begin inline asm
mov.b64 {%r250,%r251}, %rd459;
// end inline asm
shfl.sync.down.b32 %r253, %r251, %r502, 31, -1;
shfl.sync.down.b32 %r252, %r250, %r502, 31, -1;
// begin inline asm
mov.b64 %rd463, {%r252,%r253};
// end inline asm
// begin inline asm
mov.b64 {%r254,%r255}, %rd461;
// end inline asm
shfl.sync.down.b32 %r257, %r255, %r502, 31, -1;
shfl.sync.down.b32 %r256, %r254, %r502, 31, -1;
// begin inline asm
mov.b64 %rd464, {%r256,%r257};
// end inline asm
setp.ge.u16 %p77, %rs2, %rs7;
@%p77 bra $L__BB4_88;
// %bb.87: // %then.i22.i.i7277.i.i
// in Loop: Header=BB4_86 Depth=2
mov.b64 %fd150, %rd461;
mov.b64 %fd151, %rd459;
mov.b64 %fd152, %rd460;
mov.b64 %fd153, %rd464;
mov.b64 %fd154, %rd463;
mov.b64 %fd155, %rd462;
$L__tmp193:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd156, %fd155, %fd152;
$L__tmp194:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd462, %fd156;
$L__tmp195:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd157, %fd154, %fd151;
$L__tmp196:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd463, %fd157;
$L__tmp197:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd158, %fd153, %fd150;
mov.b64 %rd464, %fd158;
bra.uni $L__BB4_88;
$L__tmp198:
$L__BB4_44: // %if.end22.i.i.loopexit.i.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
st.local.u64 [%rd4], %rd439;
st.local.u64 [%rd4+8], %rd440;
$L__tmp199:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
mov.b64 %fd247, %rd440;
mov.b64 %fd246, %rd439;
$L__tmp200:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
st.local.u64 [%rd4+16], %rd441;
$L__tmp201:
$L__BB4_45: // %if.end22.i.i.i1471.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:59
setp.eq.s32 %p32, %r1, %r14;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
selp.b32 %r131, 0, %r1, %p25;
selp.b32 %r26, %r131, %r1, %p32;
setp.lt.u32 %p42, %r497, 33;
@%p42 bra $L__BB4_68;
// %bb.46: // %if.then25.i.i.i1475.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u64 %rd442, %rd213;
bra.uni $L__BB4_47;
$L__BB4_61: // %ifcont4.i59.i.i.i.i
// in Loop: Header=BB4_47 Depth=2
$L__tmp202:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
add.s64 %rd442, %rd442, 4;
cvt.u32.u64 %r220, %rd442;
setp.ne.s32 %p57, %r220, 24;
@%p57 bra $L__BB4_47;
bra.uni $L__BB4_62;
$L__BB4_47: // %body.i.i.i.i.i
// Parent Loop BB4_12 Depth=1
// => This Inner Loop Header: Depth=2
ld.shared.u32 %r208, [IsSPMDMode];
setp.ne.s32 %p43, %r208, 0;
@%p43 bra $L__BB4_50;
bra.uni $L__BB4_48;
$L__BB4_50: // %if.then2.i.i53.i.i.i.i
// in Loop: Header=BB4_47 Depth=2
bar.sync 0;
bra.uni $L__BB4_52;
$L__BB4_48: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i.i73.i.i.i.i
// in Loop: Header=BB4_47 Depth=2
ld.shared.u32 %r38, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p45, %r38, 0;
or.pred %p46, %p81, %p45;
@%p46 bra $L__BB4_51;
// %bb.49: // %if.then.i.i83.i.i.i.i
// in Loop: Header=BB4_47 Depth=2
membar.gl;
bra.uni $L__BB4_52;
$L__BB4_51: // %if.else.i.i.i.i76.i.i.i.i
// in Loop: Header=BB4_47 Depth=2
setp.eq.s32 %p47, %r38, 1;
ld.shared.u32 %r211, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p48, %r211, 0;
selp.b32 %r212, %r17, %r211, %p48;
selp.b32 %r210, %r212, 1, %p47;
mov.u32 %r209, 7;
// begin inline asm
barrier.sync %r209, %r210;
// end inline asm
$L__BB4_52: // %__kmpc_barrier.exit.i54.i.i.i.i
// in Loop: Header=BB4_47 Depth=2
add.s64 %rd56, %rd4, %rd442;
@%p86 bra $L__BB4_54;
// %bb.53: // %then.i72.i.i.i.i
// in Loop: Header=BB4_47 Depth=2
ld.local.u32 %r213, [%rd56];
st.volatile.shared.u32 [%rd6], %r213;
$L__BB4_54: // %ifcont.i55.i.i.i.i
// in Loop: Header=BB4_47 Depth=2
ld.shared.u32 %r214, [IsSPMDMode];
setp.ne.s32 %p50, %r214, 0;
@%p50 bra $L__BB4_57;
bra.uni $L__BB4_55;
$L__BB4_57: // %if.then2.i2.i57.i.i.i.i
// in Loop: Header=BB4_47 Depth=2
bar.sync 0;
bra.uni $L__BB4_59;
$L__BB4_55: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i3.i61.i.i.i.i
// in Loop: Header=BB4_47 Depth=2
ld.shared.u32 %r39, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p52, %r39, 0;
or.pred %p53, %p81, %p52;
@%p53 bra $L__BB4_58;
// %bb.56: // %if.then.i17.i71.i.i.i.i
// in Loop: Header=BB4_47 Depth=2
membar.gl;
bra.uni $L__BB4_59;
$L__BB4_58: // %if.else.i.i.i9.i64.i.i.i.i
// in Loop: Header=BB4_47 Depth=2
setp.eq.s32 %p54, %r39, 1;
ld.shared.u32 %r217, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p55, %r217, 0;
selp.b32 %r218, %r17, %r217, %p55;
selp.b32 %r216, %r218, 1, %p54;
mov.u32 %r215, 7;
// begin inline asm
barrier.sync %r215, %r216;
// end inline asm
$L__tmp203:
$L__BB4_59: // %__kmpc_barrier.exit18.i58.i.i.i.i
// in Loop: Header=BB4_47 Depth=2
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.ge.u32 %p56, %r1, %r501;
$L__tmp204:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
@%p56 bra $L__BB4_61;
// %bb.60: // %then2.i60.i.i.i.i
// in Loop: Header=BB4_47 Depth=2
ld.volatile.shared.u32 %r219, [%rd7];
st.local.u32 [%rd56], %r219;
bra.uni $L__BB4_61;
$L__tmp205:
$L__BB4_62: // %_omp_reduction_inter_warp_copy_func.exit.i.i1476.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:59
setp.gt.u32 %p58, %r1, 31;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.lt.u32 %p59, %r30, 64;
or.pred %p60, %p58, %p59;
@%p60 bra $L__BB4_68;
// %bb.63: // %while.body.lr.ph.i49.i.i.i1480.i.i
// in Loop: Header=BB4_12 Depth=1
shr.u32 %r500, %r30, 6;
cvt.u16.u32 %rs3, %r26;
$L__tmp206:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
ld.local.u64 %rd445, [%rd4+16];
$L__tmp207:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
ld.local.u64 %rd444, [%rd4];
ld.local.u64 %rd443, [%rd4+8];
bra.uni $L__BB4_64;
$L__tmp208:
$L__BB4_66: // %_omp_reduction_shuffle_and_reduce_func.exit233.i.i.i.i
// in Loop: Header=BB4_64 Depth=2
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
add.s32 %r234, %r501, 1;
shr.u32 %r43, %r234, 1;
shr.u32 %r500, %r234, 2;
setp.gt.u32 %p62, %r501, 2;
mov.u64 %rd443, %rd447;
mov.u64 %rd444, %rd446;
mov.u64 %rd445, %rd448;
mov.u32 %r501, %r43;
@%p62 bra $L__BB4_64;
bra.uni $L__BB4_67;
$L__BB4_64: // %while.body.i52.i.i.i1484.i.i
// Parent Loop BB4_12 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs6, %r500;
cvt.s32.s16 %r233, %r500;
$L__tmp209:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
// begin inline asm
mov.b64 {%r221,%r222}, %rd444;
// end inline asm
shfl.sync.down.b32 %r224, %r222, %r233, 31, -1;
shfl.sync.down.b32 %r223, %r221, %r233, 31, -1;
// begin inline asm
mov.b64 %rd446, {%r223,%r224};
// end inline asm
// begin inline asm
mov.b64 {%r225,%r226}, %rd443;
// end inline asm
shfl.sync.down.b32 %r228, %r226, %r233, 31, -1;
shfl.sync.down.b32 %r227, %r225, %r233, 31, -1;
// begin inline asm
mov.b64 %rd447, {%r227,%r228};
// end inline asm
// begin inline asm
mov.b64 {%r229,%r230}, %rd445;
// end inline asm
shfl.sync.down.b32 %r232, %r230, %r233, 31, -1;
shfl.sync.down.b32 %r231, %r229, %r233, 31, -1;
// begin inline asm
mov.b64 %rd448, {%r231,%r232};
// end inline asm
setp.ge.u16 %p61, %rs3, %rs6;
@%p61 bra $L__BB4_66;
// %bb.65: // %then.i245.i.i.i.i
// in Loop: Header=BB4_64 Depth=2
mov.b64 %fd128, %rd445;
mov.b64 %fd129, %rd443;
mov.b64 %fd130, %rd444;
mov.b64 %fd131, %rd448;
mov.b64 %fd132, %rd447;
mov.b64 %fd133, %rd446;
$L__tmp210:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd134, %fd133, %fd130;
$L__tmp211:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd446, %fd134;
$L__tmp212:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd135, %fd132, %fd129;
$L__tmp213:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd447, %fd135;
$L__tmp214:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd136, %fd131, %fd128;
mov.b64 %rd448, %fd136;
bra.uni $L__BB4_66;
$L__tmp215:
$L__BB4_67: // %if.end29.i.i.loopexit.i.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
mov.b64 %fd247, %rd447;
mov.b64 %fd246, %rd446;
$L__tmp216:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
st.local.u64 [%rd4+16], %rd448;
$L__tmp217:
$L__BB4_68: // %if.end29.i.i.i1473.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.ne.s32 %p63, %r26, 0;
@%p63 bra $L__BB4_71;
// %bb.69: // %if.end29.i.i..omp.reduction.then_crit_edge.i.i.i
// in Loop: Header=BB4_12 Depth=1
$L__tmp218:
.loc 3 112 5 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:112:5
ld.local.f64 %fd248, [%rd4+16];
$L__BB4_70: // %.omp.reduction.then.i1474.i.i
// in Loop: Header=BB4_12 Depth=1
$L__tmp219:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
ld.shared.f64 %fd137, [ratioGradRef_local_shared];
add.f64 %fd138, %fd137, %fd246;
st.shared.f64 [ratioGradRef_local_shared], %fd138;
$L__tmp220:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
ld.shared.f64 %fd139, [ratioGradRef_local_shared+8];
add.f64 %fd140, %fd139, %fd247;
st.shared.f64 [ratioGradRef_local_shared+8], %fd140;
$L__tmp221:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
ld.shared.f64 %fd141, [ratioGradRef_local_shared+16];
add.f64 %fd142, %fd141, %fd248;
st.shared.f64 [ratioGradRef_local_shared+16], %fd142;
$L__tmp222:
$L__BB4_71: // %if.end41.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
bar.sync 0;
@%p19 bra $L__BB4_73;
// %bb.72: // %if.then.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r492;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r492;
mov.u32 %r236, 1;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r236;
$L__BB4_73: // %_ZN4ompx5state9ValueRAIIINS0_5ValueIjLNS0_9ValueKindE7EEEjED1Ev.exit.i.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
bar.sync 0;
bra.uni $L__BB4_118;
$L__BB4_89: // %if.end22.i.i.loopexit.i7179.i.i
// in Loop: Header=BB4_12 Depth=1
$L__tmp223:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
st.local.u64 [%rd4], %rd462;
st.local.u64 [%rd4+8], %rd463;
$L__tmp224:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
mov.b64 %fd263, %rd463;
mov.b64 %fd262, %rd462;
$L__tmp225:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
st.local.u64 [%rd4+16], %rd464;
$L__tmp226:
$L__BB4_90: // %if.end22.i.i.i7180.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.lt.u32 %p79, %r243, 33;
@%p79 bra $L__BB4_113;
// %bb.91: // %if.then25.i.i.i7200.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u64 %rd465, 0;
bra.uni $L__BB4_92;
$L__BB4_106: // %ifcont4.i59.i.i7215.i.i
// in Loop: Header=BB4_92 Depth=2
$L__tmp227:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
add.s64 %rd465, %rd465, 4;
cvt.u32.u64 %r332, %rd465;
setp.ne.s32 %p94, %r332, 24;
@%p94 bra $L__BB4_92;
bra.uni $L__BB4_107;
$L__BB4_92: // %body.i.i.i7206.i.i
// Parent Loop BB4_12 Depth=1
// => This Inner Loop Header: Depth=2
ld.shared.u32 %r320, [IsSPMDMode];
setp.ne.s32 %p80, %r320, 0;
@%p80 bra $L__BB4_95;
bra.uni $L__BB4_93;
$L__BB4_95: // %if.then2.i.i53.i.i7209.i.i
// in Loop: Header=BB4_92 Depth=2
bar.sync 0;
bra.uni $L__BB4_97;
$L__BB4_93: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i.i73.i.i7266.i.i
// in Loop: Header=BB4_92 Depth=2
ld.shared.u32 %r53, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p82, %r53, 0;
or.pred %p83, %p81, %p82;
@%p83 bra $L__BB4_96;
// %bb.94: // %if.then.i.i83.i.i7276.i.i
// in Loop: Header=BB4_92 Depth=2
membar.gl;
bra.uni $L__BB4_97;
$L__BB4_96: // %if.else.i.i.i.i76.i.i7269.i.i
// in Loop: Header=BB4_92 Depth=2
setp.eq.s32 %p84, %r53, 1;
ld.shared.u32 %r323, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p85, %r323, 0;
selp.b32 %r324, %r17, %r323, %p85;
selp.b32 %r322, %r324, 1, %p84;
mov.u32 %r321, 7;
// begin inline asm
barrier.sync %r321, %r322;
// end inline asm
$L__BB4_97: // %__kmpc_barrier.exit.i54.i.i7210.i.i
// in Loop: Header=BB4_92 Depth=2
add.s64 %rd121, %rd4, %rd465;
@%p86 bra $L__BB4_99;
// %bb.98: // %then.i72.i.i7265.i.i
// in Loop: Header=BB4_92 Depth=2
ld.local.u32 %r325, [%rd121];
st.volatile.shared.u32 [%rd6], %r325;
$L__BB4_99: // %ifcont.i55.i.i7211.i.i
// in Loop: Header=BB4_92 Depth=2
ld.shared.u32 %r326, [IsSPMDMode];
setp.ne.s32 %p87, %r326, 0;
@%p87 bra $L__BB4_102;
bra.uni $L__BB4_100;
$L__BB4_102: // %if.then2.i2.i57.i.i7213.i.i
// in Loop: Header=BB4_92 Depth=2
bar.sync 0;
bra.uni $L__BB4_104;
$L__BB4_100: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i3.i61.i.i7254.i.i
// in Loop: Header=BB4_92 Depth=2
ld.shared.u32 %r54, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p89, %r54, 0;
or.pred %p90, %p81, %p89;
@%p90 bra $L__BB4_103;
// %bb.101: // %if.then.i17.i71.i.i7264.i.i
// in Loop: Header=BB4_92 Depth=2
membar.gl;
bra.uni $L__BB4_104;
$L__BB4_103: // %if.else.i.i.i9.i64.i.i7257.i.i
// in Loop: Header=BB4_92 Depth=2
setp.eq.s32 %p91, %r54, 1;
ld.shared.u32 %r329, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p92, %r329, 0;
selp.b32 %r330, %r17, %r329, %p92;
selp.b32 %r328, %r330, 1, %p91;
mov.u32 %r327, 7;
// begin inline asm
barrier.sync %r327, %r328;
// end inline asm
$L__tmp228:
$L__BB4_104: // %__kmpc_barrier.exit18.i58.i.i7214.i.i
// in Loop: Header=BB4_92 Depth=2
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.ge.u32 %p93, %r1, %r505;
$L__tmp229:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
@%p93 bra $L__BB4_106;
// %bb.105: // %then2.i60.i.i7253.i.i
// in Loop: Header=BB4_92 Depth=2
ld.volatile.shared.u32 %r331, [%rd7];
st.local.u32 [%rd121], %r331;
bra.uni $L__BB4_106;
$L__tmp230:
$L__BB4_107: // %_omp_reduction_inter_warp_copy_func.exit.i.i7217.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:59
setp.gt.u32 %p95, %r1, 31;
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
setp.lt.u32 %p96, %r45, 64;
or.pred %p97, %p95, %p96;
@%p97 bra $L__BB4_113;
// %bb.108: // %while.body.lr.ph.i49.i.i.i7221.i.i
// in Loop: Header=BB4_12 Depth=1
shr.u32 %r504, %r45, 6;
$L__tmp231:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
ld.local.u64 %rd468, [%rd4+16];
$L__tmp232:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
ld.local.u64 %rd467, [%rd4];
ld.local.u64 %rd466, [%rd4+8];
bra.uni $L__BB4_109;
$L__tmp233:
$L__BB4_111: // %_omp_reduction_shuffle_and_reduce_func.exit233.i.i7240.i.i
// in Loop: Header=BB4_109 Depth=2
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
add.s32 %r346, %r505, 1;
shr.u32 %r58, %r346, 1;
shr.u32 %r504, %r346, 2;
setp.gt.u32 %p99, %r505, 2;
mov.u64 %rd466, %rd470;
mov.u64 %rd467, %rd469;
mov.u64 %rd468, %rd471;
mov.u32 %r505, %r58;
@%p99 bra $L__BB4_109;
bra.uni $L__BB4_112;
$L__BB4_109: // %while.body.i52.i.i.i7227.i.i
// Parent Loop BB4_12 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs8, %r504;
cvt.s32.s16 %r345, %r504;
$L__tmp234:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
// begin inline asm
mov.b64 {%r333,%r334}, %rd467;
// end inline asm
shfl.sync.down.b32 %r336, %r334, %r345, 31, -1;
shfl.sync.down.b32 %r335, %r333, %r345, 31, -1;
// begin inline asm
mov.b64 %rd469, {%r335,%r336};
// end inline asm
// begin inline asm
mov.b64 {%r337,%r338}, %rd466;
// end inline asm
shfl.sync.down.b32 %r340, %r338, %r345, 31, -1;
shfl.sync.down.b32 %r339, %r337, %r345, 31, -1;
// begin inline asm
mov.b64 %rd470, {%r339,%r340};
// end inline asm
// begin inline asm
mov.b64 {%r341,%r342}, %rd468;
// end inline asm
shfl.sync.down.b32 %r344, %r342, %r345, 31, -1;
shfl.sync.down.b32 %r343, %r341, %r345, 31, -1;
// begin inline asm
mov.b64 %rd471, {%r343,%r344};
// end inline asm
setp.ge.u16 %p98, %rs1, %rs8;
@%p98 bra $L__BB4_111;
// %bb.110: // %then.i245.i.i7249.i.i
// in Loop: Header=BB4_109 Depth=2
mov.b64 %fd187, %rd468;
mov.b64 %fd188, %rd466;
mov.b64 %fd189, %rd467;
mov.b64 %fd190, %rd471;
mov.b64 %fd191, %rd470;
mov.b64 %fd192, %rd469;
$L__tmp235:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd193, %fd192, %fd189;
$L__tmp236:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd469, %fd193;
$L__tmp237:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd194, %fd191, %fd188;
$L__tmp238:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
mov.b64 %rd470, %fd194;
$L__tmp239:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
add.f64 %fd195, %fd190, %fd187;
mov.b64 %rd471, %fd195;
bra.uni $L__BB4_111;
$L__tmp240:
$L__BB4_112: // %if.end29.i.i.loopexit.i7248.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
mov.b64 %fd263, %rd470;
mov.b64 %fd262, %rd469;
$L__tmp241:
.loc 1 788 59 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:59
st.local.u64 [%rd4+16], %rd471;
$L__tmp242:
$L__BB4_113: // %if.end29.i.i.i7184.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
setp.ne.s32 %p100, %r1, 0;
$L__tmp243:
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
@%p100 bra $L__BB4_116;
// %bb.114: // %if.end29.i.i..omp.reduction.then_crit_edge.i7189.i.i
// in Loop: Header=BB4_12 Depth=1
$L__tmp244:
.loc 3 112 5 // qmcpack/src/Containers/OhmmsPETE/TinyVectorOps.h:112:5
ld.local.f64 %fd264, [%rd4+16];
$L__BB4_115: // %.omp.reduction.then.i7191.i.i
// in Loop: Header=BB4_12 Depth=1
$L__tmp245:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
ld.shared.f64 %fd196, [ratioGradRef_local_shared];
add.f64 %fd197, %fd196, %fd262;
st.shared.f64 [ratioGradRef_local_shared], %fd197;
$L__tmp246:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
ld.shared.f64 %fd198, [ratioGradRef_local_shared+8];
add.f64 %fd199, %fd198, %fd263;
st.shared.f64 [ratioGradRef_local_shared+8], %fd199;
$L__tmp247:
.loc 4 569 25 // qmcpack/src/Containers/PETE/OperatorTags.h:569:25
ld.shared.f64 %fd200, [ratioGradRef_local_shared+16];
add.f64 %fd201, %fd200, %fd264;
st.shared.f64 [ratioGradRef_local_shared+16], %fd201;
$L__tmp248:
$L__BB4_116: // %_ZN12_GLOBAL__N_115invokeMicrotaskEiiPvPS0_l.exit4408.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 788 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:788:1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r492;
$L__BB4_118: // %__kmpc_parallel_51.exit.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 795 29 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:795:29
ld.shared.u32 %r352, [iw_shared5_$_0];
.loc 1 795 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:795:7
mul.wide.u32 %rd352, %r352, 24;
add.s64 %rd353, %rd3, %rd352;
.loc 1 795 33 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:795:33
ld.shared.u64 %rd354, [ratioGradRef_local_shared+16];
st.global.u64 [%rd353+16], %rd354;
ld.shared.u64 %rd355, [ratioGradRef_local_shared+8];
st.global.u64 [%rd353+8], %rd355;
ld.shared.u64 %rd356, [ratioGradRef_local_shared];
st.global.u64 [%rd353], %rd356;
.loc 1 797 17 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:797:17
@%p6 bra $L__BB4_120;
// %bb.119: // %region.guarded22.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 17 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:17
mov.u64 %rd357, 0;
.loc 1 797 17 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:797:17
st.shared.u64 [c_ratio_shared8_$_0], %rd357;
$L__BB4_120: // %region.barrier20.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
ld.shared.u32 %r353, [IsSPMDMode];
setp.eq.s32 %p102, %r353, 0;
selp.b32 %r354, -32, 0, %p102;
add.s32 %r355, %r354, %r13;
setp.lt.u32 %p103, %r355, 32;
and.b32 %r356, %r355, -32;
selp.b32 %r60, 1, %r356, %p103;
selp.b32 %r357, %r60, %r13, %p102;
setp.eq.s32 %p104, %r357, %r355;
selp.b32 %r61, 0, %r357, %p104;
@%p102 bra $L__BB4_180;
$L__tmp249:
// %bb.121: // %if.then32.i47.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
setp.ne.s32 %p105, %r1, 0;
$L__tmp250:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
bar.sync 0;
@%p105 bra $L__BB4_123;
// %bb.122: // %if.else.i.i.i67.i
// in Loop: Header=BB4_12 Depth=1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r61;
mov.u32 %r358, 1;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r358;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r358;
$L__BB4_123: // %_ZN4ompx5state9ValueRAIIINS0_5ValueIjLNS0_9ValueKindE1EEEjEC1ERS4_jjbP7IdentTyb.exit.i49.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
bar.sync 0;
add.s32 %r359, %r61, -1;
setp.lt.u32 %p106, %r359, %r1;
@%p106 bra $L__BB4_177;
// %bb.124: // %if.then40.i55.i
// in Loop: Header=BB4_12 Depth=1
$L__tmp251:
.loc 1 799 32 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:32
ld.shared.u64 %rd138, [psiMinv_cols6_shared_$_0];
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
setp.eq.s64 %p107, %rd138, 0;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p107 bra $L__BB4_177;
// %bb.125: // %omp.precond.then.i53.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r62, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p108, %r62, 1;
mov.u32 %r506, 1;
@%p108 bra $L__BB4_128;
// %bb.126: // %cond.false.i.i.i68.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r506, [_ZN4ompx5state9TeamStateE_$_2];
setp.ne.s32 %p109, %r506, 0;
@%p109 bra $L__BB4_128;
// %bb.127: // %cond.false.i.i.i.i70.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r361, [IsSPMDMode];
setp.eq.s32 %p110, %r361, 0;
selp.b32 %r362, -32, 0, %p110;
add.s32 %r506, %r362, %r13;
$L__BB4_128: // %__kmpc_for_static_init_8u.exit.i55.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
add.s64 %rd139, %rd138, -1;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
ld.shared.u32 %r63, [_ZN4ompx5state9TeamStateE_$_1];
setp.eq.s32 %p111, %r62, 0;
setp.eq.s32 %p112, %r63, %r62;
selp.b32 %r363, %r1, 0, %p112;
selp.b32 %r364, 0, %r363, %p111;
cvt.s64.s32 %rd475, %r364;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
setp.lt.u64 %p113, %rd139, %rd475;
mov.f64 %fd283, 0d0000000000000000;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p113 bra $L__BB4_133;
// %bb.129: // %omp.inner.for.cond.preheader.lr.ph.i.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
cvt.s64.s32 %rd141, %r506;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd474, %rd475, 1;
$L__tmp252:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
ld.shared.u64 %rd358, [psiMinv_temp_list_devptr5_shared_$_0];
ld.shared.u32 %r365, [iw_shared5_$_0];
mul.wide.u32 %rd359, %r365, 8;
add.s64 %rd360, %rd358, %rd359;
ld.u64 %rd361, [%rd360];
ld.shared.u32 %r366, [WorkingIndex7_shared7_$_0];
ld.shared.u64 %rd362, [psiV_temp_list_ptr3_shared4_$_0];
add.s64 %rd363, %rd362, %rd359;
ld.u64 %rd364, [%rd363];
$L__tmp253:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
shl.b64 %rd365, %rd475, 3;
add.s64 %rd473, %rd364, %rd365;
shl.b64 %rd144, %rd141, 3;
shl.b64 %rd145, %rd138, 3;
mul.lo.s64 %rd366, %rd145, %rd475;
mul.wide.s32 %rd367, %r366, 8;
add.s64 %rd368, %rd366, %rd367;
add.s64 %rd472, %rd361, %rd368;
mul.lo.s64 %rd147, %rd145, %rd141;
mov.f64 %fd283, 0d0000000000000000;
mov.u64 %rd476, %rd475;
$L__BB4_130: // %omp.inner.for.cond.preheader.i.i.i
// Parent Loop BB4_12 Depth=1
// => This Loop Header: Depth=2
// Child Loop BB4_131 Depth 3
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u64 %rd477, %rd472;
mov.u64 %rd478, %rd473;
mov.u64 %rd479, %rd476;
$L__BB4_131: // %omp.inner.for.body.i65.i.i
// Parent Loop BB4_12 Depth=1
// Parent Loop BB4_130 Depth=2
// => This Inner Loop Header: Depth=3
.loc 1 802 20 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:802:20
ld.f64 %fd204, [%rd477];
.loc 1 802 72 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:802:72
ld.f64 %fd205, [%rd478];
.loc 1 802 17 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:802:17
fma.rn.f64 %fd283, %fd205, %fd204, %fd283;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd479, %rd479, 1;
add.s64 %rd478, %rd478, 8;
add.s64 %rd477, %rd477, %rd145;
setp.lt.u64 %p114, %rd479, %rd474;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p114 bra $L__BB4_131;
// %bb.132: // %omp.dispatch.inc.i.i.i
// in Loop: Header=BB4_130 Depth=2
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd476, %rd476, %rd141;
add.s64 %rd369, %rd475, %rd141;
.loc 1 799 12 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:12
min.u64 %rd475, %rd369, %rd139;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd474, %rd475, 1;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
add.s64 %rd473, %rd473, %rd144;
add.s64 %rd472, %rd472, %rd147;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
setp.lt.u64 %p115, %rd476, %rd474;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p115 bra $L__BB4_130;
$L__BB4_133: // %omp.dispatch.end.i.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u32 %r507, 1;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p108 bra $L__BB4_136;
// %bb.134: // %cond.false.i.i13.i.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r507, [_ZN4ompx5state9TeamStateE_$_2];
setp.ne.s32 %p119, %r507, 0;
@%p119 bra $L__BB4_136;
// %bb.135: // %cond.false.i.i.i14.i.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r368, [IsSPMDMode];
setp.eq.s32 %p120, %r368, 0;
selp.b32 %r369, -32, 0, %p120;
add.s32 %r507, %r369, %r13;
$L__BB4_136: // %omp_get_num_threads.exit.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
setp.eq.s32 %p122, %r507, 1;
mov.pred %p207, 0;
@%p122 bra $L__BB4_175;
// %bb.137: // %if.end5.i.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
add.s32 %r71, %r507, 31;
shr.u32 %r511, %r71, 5;
and.b32 %r509, %r507, 31;
setp.ne.s32 %p123, %r509, 0;
add.s32 %r370, %r511, -1;
setp.ge.u32 %p124, %r15, %r370;
and.pred %p125, %p123, %p124;
@%p125 bra $L__BB4_139;
// %bb.138: // %for.body.i.i.preheader.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
$L__tmp254:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %rd372, %fd283;
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
// begin inline asm
mov.b64 {%r376,%r377}, %rd372;
// end inline asm
shfl.sync.down.b32 %r379, %r377, 16, 31, -1;
shfl.sync.down.b32 %r378, %r376, 16, 31, -1;
// begin inline asm
mov.b64 %rd373, {%r378,%r379};
// end inline asm
$L__tmp255:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %fd208, %rd373;
$L__tmp256:
add.f64 %fd209, %fd283, %fd208;
$L__tmp257:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd374, %fd209;
// begin inline asm
mov.b64 {%r380,%r381}, %rd374;
// end inline asm
shfl.sync.down.b32 %r383, %r381, 8, 31, -1;
shfl.sync.down.b32 %r382, %r380, 8, 31, -1;
// begin inline asm
mov.b64 %rd375, {%r382,%r383};
// end inline asm
mov.b64 %fd210, %rd375;
$L__tmp258:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd211, %fd209, %fd210;
$L__tmp259:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd376, %fd211;
// begin inline asm
mov.b64 {%r384,%r385}, %rd376;
// end inline asm
shfl.sync.down.b32 %r387, %r385, 4, 31, -1;
shfl.sync.down.b32 %r386, %r384, 4, 31, -1;
// begin inline asm
mov.b64 %rd377, {%r386,%r387};
// end inline asm
mov.b64 %fd212, %rd377;
$L__tmp260:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd213, %fd211, %fd212;
$L__tmp261:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd378, %fd213;
// begin inline asm
mov.b64 {%r388,%r389}, %rd378;
// end inline asm
shfl.sync.down.b32 %r391, %r389, 2, 31, -1;
shfl.sync.down.b32 %r390, %r388, 2, 31, -1;
// begin inline asm
mov.b64 %rd379, {%r390,%r391};
// end inline asm
mov.b64 %fd214, %rd379;
$L__tmp262:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd215, %fd213, %fd214;
$L__tmp263:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd380, %fd215;
// begin inline asm
mov.b64 {%r392,%r393}, %rd380;
// end inline asm
shfl.sync.down.b32 %r395, %r393, 1, 31, -1;
shfl.sync.down.b32 %r394, %r392, 1, 31, -1;
// begin inline asm
mov.b64 %rd381, {%r394,%r395};
// end inline asm
mov.b64 %fd216, %rd381;
$L__tmp264:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd283, %fd215, %fd216;
$L__tmp265:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
bra.uni $L__BB4_142;
$L__tmp266:
$L__BB4_180: // %if.end45.i74.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.gt.u32 %p161, %r60, 1;
@%p161 bra $L__BB4_223;
bra.uni $L__BB4_181;
$L__BB4_223: // %__kmpc_begin_sharing_variables.exit.i78.i
// in Loop: Header=BB4_12 Depth=1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r61;
mov.u32 %r486, 1;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r486;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r486;
mov.u32 %r484, 8;
// begin inline asm
barrier.sync %r484;
// end inline asm
// begin inline asm
barrier.sync %r484;
// end inline asm
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r492;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r492;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r486;
bra.uni $L__BB4_224;
$L__BB4_181: // %if.else.i.i4468.i76.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u32 %r424, 1;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r424;
$L__tmp267:
.loc 1 799 32 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:32
ld.shared.u64 %rd167, [psiMinv_cols6_shared_$_0];
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
setp.eq.s64 %p162, %rd167, 0;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p162 bra $L__BB4_222;
// %bb.182: // %omp.precond.then.i5350.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
add.s64 %rd168, %rd167, -1;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
ld.shared.u32 %r425, [_ZN4ompx5state9TeamStateE_$_1];
ld.shared.u32 %r426, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p163, %r426, 0;
selp.b32 %r427, %r17, %r426, %p163;
setp.eq.s32 %p164, %r425, 1;
selp.b32 %r428, %r1, 0, %p164;
cvt.s64.s32 %rd483, %r428;
cvt.s64.s32 %rd170, %r427;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
setp.lt.u64 %p165, %rd168, %rd483;
mov.f64 %fd293, 0d0000000000000000;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p165 bra $L__BB4_187;
// %bb.183: // %omp.inner.for.cond.preheader.lr.ph.i5361.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd482, %rd483, 1;
$L__tmp268:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
ld.shared.u64 %rd390, [psiMinv_temp_list_devptr5_shared_$_0];
ld.shared.u32 %r429, [iw_shared5_$_0];
mul.wide.u32 %rd391, %r429, 8;
add.s64 %rd392, %rd390, %rd391;
ld.u64 %rd393, [%rd392];
ld.shared.u32 %r430, [WorkingIndex7_shared7_$_0];
ld.shared.u64 %rd394, [psiV_temp_list_ptr3_shared4_$_0];
add.s64 %rd395, %rd394, %rd391;
ld.u64 %rd396, [%rd395];
$L__tmp269:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
shl.b64 %rd397, %rd483, 3;
add.s64 %rd481, %rd396, %rd397;
shl.b64 %rd173, %rd170, 3;
shl.b64 %rd174, %rd167, 3;
mul.lo.s64 %rd398, %rd174, %rd483;
mul.wide.s32 %rd399, %r430, 8;
add.s64 %rd400, %rd398, %rd399;
add.s64 %rd480, %rd393, %rd400;
mul.lo.s64 %rd176, %rd174, %rd170;
mov.f64 %fd293, 0d0000000000000000;
mov.u64 %rd484, %rd483;
$L__BB4_184: // %omp.inner.for.cond.preheader.i5368.i.i
// Parent Loop BB4_12 Depth=1
// => This Loop Header: Depth=2
// Child Loop BB4_185 Depth 3
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u64 %rd485, %rd480;
mov.u64 %rd486, %rd481;
mov.u64 %rd487, %rd484;
$L__BB4_185: // %omp.inner.for.body.i5373.i.i
// Parent Loop BB4_12 Depth=1
// Parent Loop BB4_184 Depth=2
// => This Inner Loop Header: Depth=3
.loc 1 802 20 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:802:20
ld.f64 %fd223, [%rd485];
.loc 1 802 72 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:802:72
ld.f64 %fd224, [%rd486];
.loc 1 802 17 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:802:17
fma.rn.f64 %fd293, %fd224, %fd223, %fd293;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd487, %rd487, 1;
add.s64 %rd486, %rd486, 8;
add.s64 %rd485, %rd485, %rd174;
setp.lt.u64 %p166, %rd487, %rd482;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p166 bra $L__BB4_185;
// %bb.186: // %omp.dispatch.inc.i5383.i.i
// in Loop: Header=BB4_184 Depth=2
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd484, %rd484, %rd170;
add.s64 %rd401, %rd483, %rd170;
.loc 1 799 12 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:12
min.u64 %rd483, %rd401, %rd168;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
add.s64 %rd482, %rd483, 1;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
add.s64 %rd481, %rd481, %rd173;
add.s64 %rd480, %rd480, %rd176;
.loc 1 799 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:799:7
setp.lt.u64 %p167, %rd484, %rd482;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
@%p167 bra $L__BB4_184;
$L__BB4_187: // %omp.dispatch.end.i5389.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
cvt.u32.u64 %r431, %rd170;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.eq.s32 %p169, %r431, 1;
mov.pred %p208, %p168;
@%p169 bra $L__BB4_220;
// %bb.188: // %if.end5.i.i.i5399.i.i
// in Loop: Header=BB4_12 Depth=1
add.s32 %r89, %r431, 31;
shr.u32 %r515, %r89, 5;
and.b32 %r513, %r431, 31;
setp.ne.s32 %p170, %r513, 0;
add.s32 %r433, %r515, -1;
setp.ge.u32 %p171, %r15, %r433;
and.pred %p172, %p170, %p171;
@%p172 bra $L__BB4_190;
// %bb.189: // %for.body.i.i.preheader.i.i5540.i.i
// in Loop: Header=BB4_12 Depth=1
$L__tmp270:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %rd404, %fd293;
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
// begin inline asm
mov.b64 {%r439,%r440}, %rd404;
// end inline asm
shfl.sync.down.b32 %r442, %r440, 16, 31, -1;
shfl.sync.down.b32 %r441, %r439, 16, 31, -1;
// begin inline asm
mov.b64 %rd405, {%r441,%r442};
// end inline asm
$L__tmp271:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %fd227, %rd405;
$L__tmp272:
add.f64 %fd228, %fd293, %fd227;
$L__tmp273:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd406, %fd228;
// begin inline asm
mov.b64 {%r443,%r444}, %rd406;
// end inline asm
shfl.sync.down.b32 %r446, %r444, 8, 31, -1;
shfl.sync.down.b32 %r445, %r443, 8, 31, -1;
// begin inline asm
mov.b64 %rd407, {%r445,%r446};
// end inline asm
mov.b64 %fd229, %rd407;
$L__tmp274:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd230, %fd228, %fd229;
$L__tmp275:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd408, %fd230;
// begin inline asm
mov.b64 {%r447,%r448}, %rd408;
// end inline asm
shfl.sync.down.b32 %r450, %r448, 4, 31, -1;
shfl.sync.down.b32 %r449, %r447, 4, 31, -1;
// begin inline asm
mov.b64 %rd409, {%r449,%r450};
// end inline asm
mov.b64 %fd231, %rd409;
$L__tmp276:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd232, %fd230, %fd231;
$L__tmp277:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd410, %fd232;
// begin inline asm
mov.b64 {%r451,%r452}, %rd410;
// end inline asm
shfl.sync.down.b32 %r454, %r452, 2, 31, -1;
shfl.sync.down.b32 %r453, %r451, 2, 31, -1;
// begin inline asm
mov.b64 %rd411, {%r453,%r454};
// end inline asm
mov.b64 %fd233, %rd411;
$L__tmp278:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd234, %fd232, %fd233;
$L__tmp279:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
mov.b64 %rd412, %fd234;
// begin inline asm
mov.b64 {%r455,%r456}, %rd412;
// end inline asm
shfl.sync.down.b32 %r458, %r456, 1, 31, -1;
shfl.sync.down.b32 %r457, %r455, 1, 31, -1;
// begin inline asm
mov.b64 %rd413, {%r457,%r458};
// end inline asm
mov.b64 %fd235, %rd413;
$L__tmp280:
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd293, %fd234, %fd235;
$L__tmp281:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
bra.uni $L__BB4_193;
$L__BB4_139: // %if.then15.i.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
setp.lt.u32 %p126, %r509, 2;
@%p126 bra $L__BB4_142;
// %bb.140: // %while.body.lr.ph.i.i.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
shr.u32 %r508, %r509, 1;
$L__BB4_141: // %while.body.i.i.i.i.i.i
// Parent Loop BB4_12 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs9, %r508;
$L__tmp282:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %rd370, %fd283;
$L__tmp283:
setp.lt.u16 %p127, %rs2, %rs9;
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
// begin inline asm
mov.b64 {%r371,%r372}, %rd370;
// end inline asm
shfl.sync.down.b32 %r374, %r372, %r508, 31, -1;
shfl.sync.down.b32 %r373, %r371, %r508, 31, -1;
// begin inline asm
mov.b64 %rd371, {%r373,%r374};
// end inline asm
mov.b64 %fd206, %rd371;
selp.f64 %fd207, %fd283, 0d8000000000000000, %p127;
add.f64 %fd283, %fd207, %fd206;
$L__tmp284:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
add.s32 %r375, %r509, 1;
shr.u32 %r77, %r375, 1;
shr.u32 %r508, %r375, 2;
setp.gt.u32 %p128, %r509, 2;
mov.u32 %r509, %r77;
@%p128 bra $L__BB4_141;
$L__BB4_142: // %if.end22.i.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
setp.eq.s32 %p118, %r1, %r14;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
and.pred %p2, %p118, %p111;
selp.b32 %r67, 0, %r1, %p2;
setp.lt.u32 %p129, %r507, 33;
@%p129 bra $L__BB4_174;
// %bb.143: // %if.then25.i.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
$L__tmp285:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
ld.shared.u32 %r396, [IsSPMDMode];
setp.ne.s32 %p130, %r396, 0;
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
@%p130 bra $L__BB4_146;
bra.uni $L__BB4_144;
$L__BB4_146: // %if.then2.i.i93.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
bra.uni $L__BB4_148;
$L__tmp286:
$L__BB4_190: // %if.then15.i.i.i5411.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.lt.u32 %p173, %r513, 2;
@%p173 bra $L__BB4_193;
// %bb.191: // %while.body.lr.ph.i.i.i.i5413.i.i
// in Loop: Header=BB4_12 Depth=1
shr.u32 %r512, %r513, 1;
$L__BB4_192: // %while.body.i.i.i.i5416.i.i
// Parent Loop BB4_12 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs11, %r512;
$L__tmp287:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %rd402, %fd293;
$L__tmp288:
setp.lt.u16 %p174, %rs2, %rs11;
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
// begin inline asm
mov.b64 {%r434,%r435}, %rd402;
// end inline asm
shfl.sync.down.b32 %r437, %r435, %r512, 31, -1;
shfl.sync.down.b32 %r436, %r434, %r512, 31, -1;
// begin inline asm
mov.b64 %rd403, {%r436,%r437};
// end inline asm
mov.b64 %fd225, %rd403;
selp.f64 %fd226, %fd293, 0d8000000000000000, %p174;
add.f64 %fd293, %fd226, %fd225;
$L__tmp289:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
add.s32 %r438, %r513, 1;
shr.u32 %r95, %r438, 1;
shr.u32 %r512, %r438, 2;
setp.gt.u32 %p175, %r513, 2;
mov.u32 %r513, %r95;
@%p175 bra $L__BB4_192;
$L__BB4_193: // %if.end22.i.i.i5429.i.i
// in Loop: Header=BB4_12 Depth=1
setp.lt.u32 %p176, %r431, 33;
mov.pred %p208, %p6;
@%p176 bra $L__BB4_220;
// %bb.194: // %if.then25.i.i.i5440.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:1
mov.u32 %r460, 7;
$L__tmp290:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
// begin inline asm
barrier.sync %r460, %r431;
// end inline asm
mov.b64 %rd193, %fd293;
@%p86 bra $L__BB4_196;
// %bb.195: // %then.i143.i.i5530.i.i
// in Loop: Header=BB4_12 Depth=1
cvt.u32.u64 %r97, %rd193;
st.volatile.shared.u32 [%rd6], %r97;
$L__BB4_196: // %ifcont.i95.i.i5449.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r462, [IsSPMDMode];
setp.ne.s32 %p178, %r462, 0;
@%p178 bra $L__BB4_199;
bra.uni $L__BB4_197;
$L__BB4_199: // %if.then2.i2.i97.i.i5451.i.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
bra.uni $L__BB4_201;
$L__BB4_144: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i.i144.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
@!%p2 bra $L__BB4_147;
bra.uni $L__BB4_145;
$L__BB4_145: // %if.then.i.i154.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
membar.gl;
bra.uni $L__BB4_148;
$L__BB4_197: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i3.i132.i.i5519.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r98, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p180, %r98, 0;
or.pred %p181, %p81, %p180;
@%p181 bra $L__BB4_200;
// %bb.198: // %if.then.i17.i142.i.i5529.i.i
// in Loop: Header=BB4_12 Depth=1
membar.gl;
bra.uni $L__BB4_201;
$L__tmp291:
$L__BB4_147: // %if.else.i.i.i.i147.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.eq.s32 %p131, %r62, 1;
$L__tmp292:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
ld.shared.u32 %r399, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p132, %r399, 0;
selp.b32 %r400, %r17, %r399, %p132;
selp.b32 %r398, %r400, 1, %p131;
mov.u32 %r397, 7;
// begin inline asm
barrier.sync %r397, %r398;
// end inline asm
$L__BB4_148: // %__kmpc_barrier.exit.i94.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
mov.b64 %rd164, %fd283;
@%p86 bra $L__BB4_150;
// %bb.149: // %then.i143.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
cvt.u32.u64 %r79, %rd164;
st.volatile.shared.u32 [%rd6], %r79;
$L__BB4_150: // %ifcont.i95.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r401, [IsSPMDMode];
setp.ne.s32 %p134, %r401, 0;
@%p134 bra $L__BB4_153;
bra.uni $L__BB4_151;
$L__BB4_153: // %if.then2.i2.i97.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
bra.uni $L__BB4_155;
$L__BB4_151: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i3.i132.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r80, [_ZN4ompx5state9TeamStateE_$_0];
setp.ne.s32 %p136, %r80, 0;
or.pred %p137, %p81, %p136;
@%p137 bra $L__BB4_154;
// %bb.152: // %if.then.i17.i142.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
membar.gl;
bra.uni $L__BB4_155;
$L__BB4_154: // %if.else.i.i.i9.i135.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
setp.eq.s32 %p138, %r80, 1;
ld.shared.u32 %r404, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p139, %r404, 0;
selp.b32 %r405, %r17, %r404, %p139;
selp.b32 %r403, %r405, 1, %p138;
mov.u32 %r402, 7;
// begin inline asm
barrier.sync %r402, %r403;
// end inline asm
$L__tmp293:
$L__BB4_155: // %__kmpc_barrier.exit18.i98.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.ge.u32 %p140, %r1, %r511;
$L__tmp294:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
@%p140 bra $L__BB4_157;
// %bb.156: // %then2.i131.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
and.b64 %rd165, %rd164, -4294967296;
ld.volatile.shared.u32 %rd382, [%rd7];
or.b64 %rd383, %rd165, %rd382;
mov.b64 %fd283, %rd383;
$L__BB4_157: // %ifcont4.i99.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r406, [IsSPMDMode];
setp.eq.s32 %p141, %r406, 0;
@%p141 bra $L__BB4_159;
// %bb.158: // %if.then2.i.1.i101.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
bra.uni $L__BB4_162;
$L__BB4_159: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i.1.i120.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r81, [_ZN4ompx5state9TeamStateE_$_0];
setp.eq.s32 %p143, %r81, 0;
and.pred %p144, %p118, %p143;
@%p144 bra $L__BB4_161;
bra.uni $L__BB4_160;
$L__BB4_161: // %if.then.i.1.i130.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
membar.gl;
bra.uni $L__BB4_162;
$L__BB4_160: // %if.else.i.i.i.1.i123.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
setp.eq.s32 %p145, %r81, 1;
ld.shared.u32 %r409, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p146, %r409, 0;
selp.b32 %r410, %r17, %r409, %p146;
selp.b32 %r408, %r410, 1, %p145;
mov.u32 %r407, 7;
// begin inline asm
barrier.sync %r407, %r408;
// end inline asm
$L__BB4_162: // %__kmpc_barrier.exit.1.i102.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
mov.b64 %rd166, %fd283;
@%p86 bra $L__BB4_164;
// %bb.163: // %then.1.i119.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
{ .reg .b32 tmp; mov.b64 {tmp, %r82}, %rd166; }
st.volatile.shared.u32 [%rd6], %r82;
$L__BB4_164: // %ifcont.1.i103.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r411, [IsSPMDMode];
setp.eq.s32 %p148, %r411, 0;
@%p148 bra $L__BB4_166;
// %bb.165: // %if.then2.i2.1.i105.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
bra.uni $L__BB4_169;
$L__BB4_166: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i3.1.i108.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r83, [_ZN4ompx5state9TeamStateE_$_0];
setp.eq.s32 %p150, %r83, 0;
and.pred %p151, %p118, %p150;
@%p151 bra $L__BB4_168;
bra.uni $L__BB4_167;
$L__BB4_168: // %if.then.i17.1.i118.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
membar.gl;
bra.uni $L__BB4_169;
$L__BB4_167: // %if.else.i.i.i9.1.i111.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
setp.eq.s32 %p152, %r83, 1;
ld.shared.u32 %r414, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p153, %r414, 0;
selp.b32 %r415, %r17, %r414, %p153;
selp.b32 %r413, %r415, 1, %p152;
mov.u32 %r412, 7;
// begin inline asm
barrier.sync %r412, %r413;
// end inline asm
$L__BB4_169: // %__kmpc_barrier.exit18.1.i106.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
@%p140 bra $L__BB4_171;
$L__tmp295:
// %bb.170: // %_omp_reduction_inter_warp_copy_func.exit.sink.split.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
ld.volatile.shared.u32 %rd384, [%rd7];
shl.b64 %rd385, %rd384, 32;
and.b64 %rd386, %rd166, 4294967295;
or.b64 %rd387, %rd385, %rd386;
mov.b64 %fd283, %rd387;
$L__tmp296:
$L__BB4_171: // %_omp_reduction_inter_warp_copy_func.exit.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
setp.gt.u32 %p155, %r1, 31;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.lt.u32 %p156, %r71, 64;
or.pred %p157, %p155, %p156;
@%p157 bra $L__BB4_174;
// %bb.172: // %while.body.lr.ph.i49.i.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
shr.u32 %r510, %r71, 6;
cvt.u16.u32 %rs4, %r67;
$L__BB4_173: // %while.body.i52.i.i.i.i.i
// Parent Loop BB4_12 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs10, %r510;
$L__tmp297:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %rd388, %fd283;
cvt.s32.s16 %r420, %r510;
setp.lt.u16 %p158, %rs4, %rs10;
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
// begin inline asm
mov.b64 {%r416,%r417}, %rd388;
// end inline asm
shfl.sync.down.b32 %r419, %r417, %r420, 31, -1;
shfl.sync.down.b32 %r418, %r416, %r420, 31, -1;
// begin inline asm
mov.b64 %rd389, {%r418,%r419};
// end inline asm
mov.b64 %fd217, %rd389;
selp.f64 %fd218, %fd283, 0d8000000000000000, %p158;
add.f64 %fd283, %fd218, %fd217;
$L__tmp298:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
add.s32 %r421, %r511, 1;
shr.u32 %r87, %r421, 1;
shr.u32 %r510, %r421, 2;
setp.gt.u32 %p159, %r511, 2;
mov.u32 %r511, %r87;
@%p159 bra $L__BB4_173;
$L__BB4_174: // %if.end29.i.i.i.i.i
// in Loop: Header=BB4_12 Depth=1
setp.ne.s32 %p207, %r67, 0;
$L__BB4_175: // %__kmpc_nvptx_parallel_reduce_nowait_v2.exit.i.i.i
// in Loop: Header=BB4_12 Depth=1
@%p207 bra $L__BB4_177;
// %bb.176: // %.omp.reduction.then.i.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 798 40 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:40
ld.shared.f64 %fd219, [c_ratio_shared8_$_0];
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd220, %fd219, %fd283;
st.shared.f64 [c_ratio_shared8_$_0], %fd220;
$L__tmp299:
$L__BB4_177: // %if.end41.i60.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
bar.sync 0;
@%p105 bra $L__BB4_179;
// %bb.178: // %if.then.i.i.i66.i
// in Loop: Header=BB4_12 Depth=1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r492;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_1], %r492;
mov.u32 %r423, 1;
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_2], %r423;
$L__BB4_179: // %_ZN4ompx5state9ValueRAIIINS0_5ValueIjLNS0_9ValueKindE7EEEjED1Ev.exit.i61.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
bar.sync 0;
bra.uni $L__BB4_224;
$L__BB4_200: // %if.else.i.i.i9.i135.i.i5522.i.i
// in Loop: Header=BB4_12 Depth=1
$L__tmp300:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
setp.eq.s32 %p182, %r98, 1;
ld.shared.u32 %r465, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p183, %r465, 0;
selp.b32 %r466, %r17, %r465, %p183;
selp.b32 %r464, %r466, 1, %p182;
// begin inline asm
barrier.sync %r460, %r464;
// end inline asm
$L__tmp301:
$L__BB4_201: // %__kmpc_barrier.exit18.i98.i.i5452.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.ge.u32 %p184, %r1, %r515;
$L__tmp302:
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
@%p184 bra $L__BB4_203;
// %bb.202: // %then2.i131.i.i5515.i.i
// in Loop: Header=BB4_12 Depth=1
and.b64 %rd194, %rd193, -4294967296;
ld.volatile.shared.u32 %rd414, [%rd7];
or.b64 %rd415, %rd194, %rd414;
mov.b64 %fd293, %rd415;
$L__BB4_203: // %ifcont4.i99.i.i5453.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r467, [IsSPMDMode];
setp.eq.s32 %p185, %r467, 0;
@%p185 bra $L__BB4_205;
// %bb.204: // %if.then2.i.1.i101.i.i5456.i.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
bra.uni $L__BB4_208;
$L__BB4_205: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i.1.i120.i.i5504.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:48
setp.eq.s32 %p186, %r1, %r14;
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
ld.shared.u32 %r99, [_ZN4ompx5state9TeamStateE_$_0];
setp.eq.s32 %p187, %r99, 0;
and.pred %p188, %p186, %p187;
@%p188 bra $L__BB4_207;
bra.uni $L__BB4_206;
$L__BB4_207: // %if.then.i.1.i130.i.i5514.i.i
// in Loop: Header=BB4_12 Depth=1
membar.gl;
bra.uni $L__BB4_208;
$L__BB4_206: // %if.else.i.i.i.1.i123.i.i5507.i.i
// in Loop: Header=BB4_12 Depth=1
setp.eq.s32 %p189, %r99, 1;
ld.shared.u32 %r470, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p190, %r470, 0;
selp.b32 %r471, %r17, %r470, %p190;
selp.b32 %r469, %r471, 1, %p189;
// begin inline asm
barrier.sync %r460, %r469;
// end inline asm
$L__BB4_208: // %__kmpc_barrier.exit.1.i102.i.i5457.i.i
// in Loop: Header=BB4_12 Depth=1
mov.b64 %rd195, %fd293;
@%p86 bra $L__BB4_210;
// %bb.209: // %then.1.i119.i.i5501.i.i
// in Loop: Header=BB4_12 Depth=1
{ .reg .b32 tmp; mov.b64 {tmp, %r100}, %rd195; }
st.volatile.shared.u32 [%rd6], %r100;
$L__BB4_210: // %ifcont.1.i103.i.i5458.i.i
// in Loop: Header=BB4_12 Depth=1
ld.shared.u32 %r472, [IsSPMDMode];
setp.eq.s32 %p192, %r472, 0;
@%p192 bra $L__BB4_212;
// %bb.211: // %if.then2.i2.1.i105.i.i5460.i.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
bra.uni $L__BB4_215;
$L__BB4_212: // %_ZN4ompx5state5ValueIjLNS0_9ValueKindE1EEcvjEv.exit.i.i.i3.1.i108.i.i5490.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:48
setp.eq.s32 %p193, %r1, %r14;
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
ld.shared.u32 %r101, [_ZN4ompx5state9TeamStateE_$_0];
setp.eq.s32 %p194, %r101, 0;
and.pred %p195, %p193, %p194;
@%p195 bra $L__BB4_214;
bra.uni $L__BB4_213;
$L__BB4_214: // %if.then.i17.1.i118.i.i5500.i.i
// in Loop: Header=BB4_12 Depth=1
membar.gl;
bra.uni $L__BB4_215;
$L__BB4_213: // %if.else.i.i.i9.1.i111.i.i5493.i.i
// in Loop: Header=BB4_12 Depth=1
setp.eq.s32 %p196, %r101, 1;
ld.shared.u32 %r475, [_ZN4ompx5state9TeamStateE_$_2];
setp.eq.s32 %p197, %r475, 0;
selp.b32 %r476, %r17, %r475, %p197;
selp.b32 %r474, %r476, 1, %p196;
// begin inline asm
barrier.sync %r460, %r474;
// end inline asm
$L__BB4_215: // %__kmpc_barrier.exit18.1.i106.i.i5461.i.i
// in Loop: Header=BB4_12 Depth=1
@%p184 bra $L__BB4_217;
$L__tmp303:
// %bb.216: // %_omp_reduction_inter_warp_copy_func.exit.sink.split.i.i5485.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
ld.volatile.shared.u32 %rd416, [%rd7];
shl.b64 %rd417, %rd416, 32;
and.b64 %rd418, %rd195, 4294967295;
or.b64 %rd419, %rd417, %rd418;
mov.b64 %fd293, %rd419;
$L__tmp304:
$L__BB4_217: // %_omp_reduction_inter_warp_copy_func.exit.i.i5462.i.i
// in Loop: Header=BB4_12 Depth=1
setp.gt.u32 %p199, %r1, 31;
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
setp.lt.u32 %p200, %r89, 64;
or.pred %p201, %p199, %p200;
mov.pred %p208, %p6;
@%p201 bra $L__BB4_220;
// %bb.218: // %while.body.lr.ph.i49.i.i.i5467.i.i
// in Loop: Header=BB4_12 Depth=1
shr.u32 %r514, %r89, 6;
$L__BB4_219: // %while.body.i52.i.i.i5470.i.i
// Parent Loop BB4_12 Depth=1
// => This Inner Loop Header: Depth=2
cvt.u16.u32 %rs12, %r514;
$L__tmp305:
.loc 1 0 0 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:0:0
mov.b64 %rd420, %fd293;
cvt.s32.s16 %r481, %r514;
setp.lt.u16 %p202, %rs1, %rs12;
.loc 1 798 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:48
// begin inline asm
mov.b64 {%r477,%r478}, %rd420;
// end inline asm
shfl.sync.down.b32 %r480, %r478, %r481, 31, -1;
shfl.sync.down.b32 %r479, %r477, %r481, 31, -1;
// begin inline asm
mov.b64 %rd421, {%r479,%r480};
// end inline asm
mov.b64 %fd236, %rd421;
selp.f64 %fd237, %fd293, 0d8000000000000000, %p202;
add.f64 %fd293, %fd237, %fd236;
$L__tmp306:
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
add.s32 %r482, %r515, 1;
shr.u32 %r105, %r482, 1;
shr.u32 %r514, %r482, 2;
setp.gt.u32 %p203, %r515, 2;
mov.u32 %r515, %r105;
mov.pred %p208, %p6;
@%p203 bra $L__BB4_219;
$L__BB4_220: // %__kmpc_nvptx_parallel_reduce_nowait_v2.exit.i5435.i.i
// in Loop: Header=BB4_12 Depth=1
@%p208 bra $L__BB4_222;
// %bb.221: // %.omp.reduction.then.i5438.i.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 798 40 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:40
ld.shared.f64 %fd238, [c_ratio_shared8_$_0];
.loc 1 798 36 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:36
add.f64 %fd239, %fd238, %fd293;
st.shared.f64 [c_ratio_shared8_$_0], %fd239;
$L__tmp307:
$L__BB4_222: // %_ZN12_GLOBAL__N_115invokeMicrotaskEiiPvPS0_l.exit4408.i77.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 798 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:798:1
st.shared.u32 [_ZN4ompx5state9TeamStateE_$_0], %r492;
$L__tmp308:
$L__BB4_224: // %__kmpc_parallel_51.exit88.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 783 1 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:783:1
setp.ne.s32 %p204, %r1, 0;
$L__tmp309:
.loc 1 804 29 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:804:29
@%p204 bra $L__BB4_226;
// %bb.225: // %region.guarded27.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 804 25 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:804:25
ld.shared.u32 %r488, [iw_shared5_$_0];
.loc 1 804 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:804:7
mul.wide.u32 %rd422, %r488, 8;
add.s64 %rd423, %rd2, %rd422;
.loc 1 804 31 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:804:31
ld.shared.f64 %fd240, [c_ratio_shared8_$_0];
.loc 1 804 29 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:804:29
st.global.f64 [%rd423], %fd240;
$L__BB4_226: // %region.barrier25.i
// in Loop: Header=BB4_12 Depth=1
bar.sync 0;
.loc 1 805 33 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:805:33
@%p204 bra $L__BB4_228;
// %bb.227: // %region.guarded32.i
// in Loop: Header=BB4_12 Depth=1
.loc 1 805 29 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:805:29
ld.shared.u32 %r489, [iw_shared5_$_0];
.loc 1 805 7 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:805:7
mul.wide.u32 %rd424, %r489, 8;
add.s64 %rd425, %rd1, %rd424;
.loc 1 805 50 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:805:50
ld.shared.f64 %fd241, [c_ratio_shared8_$_0];
.loc 1 805 48 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:805:48
rcp.rn.f64 %fd242, %fd241;
.loc 1 805 33 // qmcpack/src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp:805:33
st.global.f64 [%rd425], %fd242;
bra.uni $L__BB4_228;
$L__tmp310:
$L__func_end4:
// -- End function
}
//--------------------- .text.__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 --------------------------
.section .text.__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783,"ax",@progbits
.sectionflags @"SHF_BARRIERS=9"
.sectioninfo @"SHI_REGISTERS=96"
.align 128
.text.__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783:
.weak __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783
.type __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783,@function
.size __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783,(.L_x_974 - __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783)
.other __omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783,@"STO_CUDA_ENTRY STV_DEFAULT"
__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783:
/*0000*/ MOV R1, c[0x0][0x28] ;
/*0010*/ S2R R22, SR_TID.X ;
/*0020*/ BSSY B0, `(.L_x_600) ;
/*0030*/ IADD3 R1, R1, -0x18, RZ ;
/*0040*/ ISETP.NE.AND P0, PT, RZ, c[0x0][0x160], PT ;
/*0050*/ ISETP.NE.AND P1, PT, R22, RZ, PT ;
/*0060*/ P2R R29, PR, RZ, 0x2 ;
/*0070*/ @!P1 IMAD.MOV.U32 R0, RZ, RZ, 0x1 ;
/*0080*/ @!P1 STS [`(IsSPMDMode)], R0 ;
/*0090*/ @!P1 STS [`(_ZN4ompx5state9TeamStateE_$_0)], RZ ;
/*00a0*/ @!P1 STS [`(_ZN4ompx5state9TeamStateE_$_1)], RZ ;
/*00b0*/ @!P1 STS [`(_ZN4ompx5state9TeamStateE_$_2)], R0 ;
/*00c0*/ BAR.SYNC 0x0 ;
/*00d0*/ BAR.SYNC 0x0 ;
/*00e0*/ @P1 BRA `(.L_x_601) ;
/*00f0*/ MOV R2, c[0x0][0x168] ;
/*0100*/ IMAD.MOV.U32 R4, RZ, RZ, c[0x0][0x170] ;
/*0110*/ MOV R3, c[0x0][0x16c] ;
/*0120*/ IMAD.MOV.U32 R6, RZ, RZ, c[0x0][0x178] ;
/*0130*/ MOV R5, c[0x0][0x174] ;
/*0140*/ IMAD.MOV.U32 R8, RZ, RZ, c[0x0][0x180] ;
/*0150*/ MOV R7, c[0x0][0x17c] ;
/*0160*/ IMAD.MOV.U32 R10, RZ, RZ, c[0x0][0x188] ;
/*0170*/ MOV R9, c[0x0][0x184] ;
/*0180*/ STS.64 [`($__NumPtcls1_shared2_$_0__2682)], R2 ;
/*0190*/ MOV R11, c[0x0][0x18c] ;
/*01a0*/ IMAD.MOV.U32 R12, RZ, RZ, c[0x0][0x190] ;
/*01b0*/ MOV R13, c[0x0][0x194] ;
/*01c0*/ STS.64 [`($__confgListOccup_ptr2_shared3_$_0__2684)], R4 ;
/*01d0*/ IMAD.MOV.U32 R0, RZ, RZ, c[0x0][0x198] ;
/*01e0*/ MOV R14, c[0x0][0x1a0] ;
/*01f0*/ IMAD.MOV.U32 R15, RZ, RZ, c[0x0][0x1a4] ;
/*0200*/ STS.64 [`($__psiV_temp_list_ptr3_shared4_$_0__2686)], R6 ;
/*0210*/ STS.64 [`($__psiV_list_devptr4_shared6_$_0__2690)], R8 ;
/*0220*/ STS.64 [`($__psiMinv_temp_list_devptr5_shared_$_0__2694)], R10 ;
/*0230*/ STS.64 [`($__psiMinv_cols6_shared_$_0__2696)], R12 ;
/*0240*/ STS [`($__WorkingIndex7_shared7_$_0__2698)], R0 ;
/*0250*/ STS.64 [`($__dpsiV_list_ptr8_shared_$_0__2700)], R14 ;
.L_x_601:
/*0260*/ BSYNC B0 ;
.L_x_600:
/*0270*/ BAR.SYNC 0x0 ;
/*0280*/ @!P1 STS [`($__iw_shared5_$_0__2688)], RZ ;
/*0290*/ BAR.SYNC 0x0 ;
/*02a0*/ @!P0 EXIT ;
/*02b0*/ I2F.U32.RP R0, c[0x0][0xc] ;
/*02c0*/ IMAD.MOV.U32 R24, RZ, RZ, c[0x0][0x160] ;
/*02d0*/ ISETP.NE.U32.AND P2, PT, RZ, c[0x0][0xc], PT ;
/*02e0*/ S2UR UR4, SR_CTAID.X ;
/*02f0*/ MUFU.RCP R0, R0 ;
/*0300*/ IADD3 R2, R0, 0xffffffe, RZ ;
/*0310*/ F2I.FTZ.U32.TRUNC.NTZ R3, R2 ;
/*0320*/ IMAD.MOV.U32 R2, RZ, RZ, RZ ;
/*0330*/ IMAD.MOV R5, RZ, RZ, -R3 ;
/*0340*/ IMAD R5, R5, c[0x0][0xc], RZ ;
/*0350*/ IMAD.HI.U32 R3, R3, R5, R2 ;
/*0360*/ IMAD.HI.U32 R3, R3, c[0x0][0x160], RZ ;
/*0370*/ IMAD.MOV R5, RZ, RZ, -R3 ;
/*0380*/ IMAD R0, R5, c[0x0][0xc], R24 ;
/*0390*/ ISETP.GE.U32.AND P0, PT, R0, c[0x0][0xc], PT ;
/*03a0*/ @P0 IADD3 R0, R0, -c[0x0][0xc], RZ ;
/*03b0*/ @P0 IADD3 R3, R3, 0x1, RZ ;
/*03c0*/ ISETP.GE.U32.AND P1, PT, R0, c[0x0][0xc], PT ;
/*03d0*/ @P1 IADD3 R3, R3, 0x1, RZ ;
/*03e0*/ @!P2 LOP3.LUT R3, RZ, c[0x0][0xc], RZ, 0x33, !PT ;
/*03f0*/ IMAD.MOV R5, RZ, RZ, -R3 ;
/*0400*/ IMAD R0, R5, c[0x0][0xc], R24 ;
/*0410*/ IADD3 R24, R24, -0x1, RZ ;
/*0420*/ ISETP.GT.U32.AND P0, PT, R0, UR4, PT ;
/*0430*/ @P0 IADD3 R23, R3, 0x1, RZ ;
/*0440*/ @P0 IMAD R23, R23, UR4, RZ ;
/*0450*/ @!P0 IMAD R23, R3.reuse, UR4, R0 ;
/*0460*/ @!P0 IADD3 R3, R3, -0x1, RZ ;
/*0470*/ IMAD.IADD R3, R23, 0x1, R3 ;
/*0480*/ IMNMX.U32 R24, R24, R3, PT ;
/*0490*/ ISETP.GT.U32.AND P0, PT, R23, R24, PT ;
/*04a0*/ @P0 EXIT ;
/*04b0*/ ULDC UR4, c[0x0][0x0] ;
/*04c0*/ SHF.R.S32.HI R28, RZ, 0x5, R22.reuse ;
/*04d0*/ UIADD3 UR4, UR4, -0x1, URZ ;
/*04e0*/ LOP3.LUT P1, R25, R22.reuse, 0x1f, RZ, 0xc0, !PT ;
/*04f0*/ IMAD.SHL.U32 R27, R22, 0x4, RZ ;
/*0500*/ SHF.R.U32.HI R26, RZ, 0x5, R22 ;
/*0510*/ ULOP3.LUT UR38, UR4, 0xffffffe0, URZ, 0xc0, !UPT ;
/*0520*/ P2R R31, PR, RZ, 0x2 ;
/*0530*/ IMAD.SHL.U32 R28, R28, 0x4, RZ ;
/*0540*/ ULDC.64 UR36, c[0x0][0x118] ;
/*0550*/ ISETP.NE.AND P0, PT, R22, UR38, PT ;
/*0560*/ P2R R0, PR, RZ, 0x1 ;
.L_x_763:
/*0570*/ ISETP.NE.AND P0, PT, R29, RZ, PT ;
/*0580*/ ULDC UR4, c[0x0][0x0] ;
/*0590*/ UIADD3 UR4, UR4, -0x20, URZ ;
/*05a0*/ IMAD.U32 R30, RZ, RZ, UR4 ;
/*05b0*/ @!P0 STS [`($__iw_shared5_$_0__2688)], R23 ;
/*05c0*/ BAR.SYNC 0x0 ;
/*05d0*/ @!P0 STS.64 [`($__ratioGradRef_local_shared__2692)], RZ ;
/*05e0*/ BAR.SYNC 0x0 ;
/*05f0*/ @!P0 STS.64 [`(($__ratioGradRef_local_shared__2692 + 0x8))], RZ ;
/*0600*/ BAR.SYNC 0x0 ;
/*0610*/ @!P0 STS.64 [`(($__ratioGradRef_local_shared__2692 + 0x10))], RZ ;
/*0620*/ BAR.SYNC 0x0 ;
/*0630*/ LDS R0, [`(IsSPMDMode)] ;
/*0640*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*0650*/ IMAD.MOV.U32 R0, RZ, RZ, R30 ;
/*0660*/ @P0 IMAD.MOV R0, RZ, RZ, c[0x0][0x0] ;
/*0670*/ ISETP.GE.U32.AND P1, PT, R0.reuse, 0x20, PT ;
/*0680*/ LOP3.LUT R2, R0, 0xffffffe0, RZ, 0xc0, !PT ;
/*0690*/ SEL R2, R2, 0x1, P1 ;
/*06a0*/ SEL R3, R2, c[0x0][0x0], !P0 ;
/*06b0*/ ISETP.NE.AND P1, PT, R3, R0, PT ;
/*06c0*/ SEL R0, R3, RZ, P1 ;
/*06d0*/ @!P0 BRA `(.L_x_602) ;
/*06e0*/ BAR.SYNC 0x0 ;
/*06f0*/ ISETP.NE.AND P0, PT, R22, RZ, PT ;
/*0700*/ BSSY B10, `(.L_x_603) ;
/*0710*/ IADD3 R3, R0, -0x1, RZ ;
/*0720*/ P2R R29, PR, RZ, 0x1 ;
/*0730*/ @!P0 IMAD.MOV.U32 R2, RZ, RZ, 0x1 ;
/*0740*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_2)], R0 ;
/*0750*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_1)], R2 ;
/*0760*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_0)], R2 ;
/*0770*/ BAR.SYNC 0x0 ;
/*0780*/ BAR.SYNC 0x0 ;
/*0790*/ ISETP.GE.U32.AND P0, PT, R3, R22, PT ;
/*07a0*/ @!P0 BRA `(.L_x_604) ;
/*07b0*/ LDS.64 R4, [`($__NumPtcls1_shared2_$_0__2682)] ;
/*07c0*/ ISETP.NE.U32.AND P0, PT, R4, RZ, PT ;
/*07d0*/ ISETP.NE.AND.EX P0, PT, R5, RZ, PT, P0 ;
/*07e0*/ @!P0 BRA `(.L_x_604) ;
/*07f0*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*0800*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_1)] ;
/*0810*/ ISETP.NE.AND P1, PT, R0.reuse, 0x1, PT ;
/*0820*/ ISETP.NE.AND P3, PT, R0, RZ, PT ;
/*0830*/ ISETP.NE.AND P0, PT, R3, R0, PT ;
/*0840*/ IADD3 R0, P2, R4, -0x1, RZ ;
/*0850*/ SEL R2, R22, RZ, !P0 ;
/*0860*/ IADD3.X R4, R5, -0x1, RZ, P2, !PT ;
/*0870*/ IMAD.MOV.U32 R5, RZ, RZ, 0x1 ;
/*0880*/ P2R R32, PR, RZ, 0x8 ;
/*0890*/ SEL R33, R2, RZ, P3 ;
/*08a0*/ @P1 BRA `(.L_x_605) ;
/*08b0*/ LDS R5, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*08c0*/ ISETP.NE.AND P0, PT, R5, RZ, PT ;
/*08d0*/ @!P0 LDS R2, [`(IsSPMDMode)] ;
/*08e0*/ ISETP.NE.AND P2, PT, R2, RZ, !P0 ;
/*08f0*/ @!P0 IMAD.MOV.U32 R2, RZ, RZ, R30 ;
/*0900*/ @P2 IADD3 R2, RZ, c[0x0][0x0], RZ ;
/*0910*/ @!P0 IMAD.MOV.U32 R5, RZ, RZ, R2 ;
.L_x_605:
/*0920*/ ISETP.GE.U32.AND P0, PT, R0, R33, PT ;
/*0930*/ BSSY B0, `(.L_x_606) ;
/*0940*/ SHF.R.S32.HI R37, RZ, 0x1f, R33 ;
/*0950*/ CS2R R2, SRZ ;
/*0960*/ CS2R R16, SRZ ;
/*0970*/ CS2R R18, SRZ ;
/*0980*/ ISETP.GE.U32.AND.EX P0, PT, R4, R37, PT, P0 ;
/*0990*/ @!P0 BRA `(.L_x_607) ;
/*09a0*/ LDS R35, [`($__iw_shared5_$_0__2688)] ;
/*09b0*/ LDS.64 R8, [`($__psiMinv_temp_list_devptr5_shared_$_0__2694)] ;
/*09c0*/ LDS.64 R6, [`($__psiV_temp_list_ptr3_shared4_$_0__2686)] ;
/*09d0*/ LDS.64 R2, [`($__psiV_list_devptr4_shared6_$_0__2690)] ;
/*09e0*/ LDS.64 R14, [`($__dpsiV_list_ptr8_shared_$_0__2700)] ;
/*09f0*/ LDS R16, [`($__WorkingIndex7_shared7_$_0__2698)] ;
/*0a00*/ IMAD.WIDE.U32 R12, R35, 0x8, R8 ;
/*0a10*/ LD.E.64 R12, [R12.64] ;
/*0a20*/ IMAD.WIDE.U32 R10, R35, 0x8, R6 ;
/*0a30*/ LD.E.64 R78, [R10.64] ;
/*0a40*/ IMAD.WIDE.U32 R20, R35, 0x8, R2 ;
/*0a50*/ LDS.64 R2, [`($__confgListOccup_ptr2_shared3_$_0__2684)] ;
/*0a60*/ IMAD.WIDE.U32 R34, R35, 0x8, R14 ;
/*0a70*/ LD.E.64 R20, [R20.64] ;
/*0a80*/ LD.E.64 R34, [R34.64] ;
/*0a90*/ IMAD.MOV.U32 R8, RZ, RZ, R33 ;
/*0aa0*/ SHF.R.S32.HI R17, RZ, 0x1f, R16 ;
/*0ab0*/ IMAD.MOV.U32 R7, RZ, RZ, R37 ;
/*0ac0*/ LDS.64 R14, [`($__psiMinv_cols6_shared_$_0__2696)] ;
/*0ad0*/ SHF.R.S32.HI R80, RZ, 0x1f, R5 ;
/*0ae0*/ IMAD.SHL.U32 R81, R5, 0x8, RZ ;
/*0af0*/ MOV R85, R8 ;
/*0b00*/ IMAD.MOV.U32 R87, RZ, RZ, R7.reuse ;
/*0b10*/ SHF.L.U64.HI R19, R8, 0x3, R7 ;
/*0b20*/ SHF.L.U64.HI R83, R5, 0x3, R80 ;
/*0b30*/ IMAD R6, R15, R8.reuse, RZ ;
/*0b40*/ SHF.L.U32 R33, R14.reuse, 0x3, RZ ;
/*0b50*/ IMAD.WIDE.U32 R10, R14, R8, R16 ;
/*0b60*/ IMAD R9, R14, R7, R6 ;
/*0b70*/ IMAD.SHL.U32 R17, R8, 0x8, RZ ;
/*0b80*/ IMAD.WIDE.U32 R36, R33, R5, RZ ;
/*0b90*/ IADD3 R11, R11, R9, RZ ;
/*0ba0*/ LEA R9, P0, R10, R12, 0x3 ;
/*0bb0*/ IADD3 R12, P2, R2, R17, RZ ;
/*0bc0*/ LEA.HI.X R10, R10, R13, R11, 0x3, P0 ;
/*0bd0*/ SHF.L.U64.HI R13, R14, 0x3, R15 ;
/*0be0*/ IADD3 R11, P0, R78, R17, RZ ;
/*0bf0*/ CS2R R16, SRZ ;
/*0c00*/ IMAD R6, R13, R5, RZ ;
/*0c10*/ IMAD.X R78, R79, 0x1, R19, P0 ;
/*0c20*/ IADD3 R82, P0, R8, 0x1, RZ ;
/*0c30*/ IMAD R91, R80, R33, R6 ;
/*0c40*/ IADD3.X R79, R3, R19, RZ, P2, !PT ;
/*0c50*/ CS2R R18, SRZ ;
/*0c60*/ CS2R R2, SRZ ;
/*0c70*/ IADD3.X R89, RZ, R7, RZ, P0, !PT ;
/*0c80*/ IADD3 R91, R37, R91, RZ ;
.L_x_615:
/*0c90*/ ISETP.GE.U32.AND P2, PT, R85, R82, PT ;
/*0ca0*/ BSSY B1, `(.L_x_608) ;
/*0cb0*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x80, 0x0 ;
/*0cc0*/ IMAD.MOV.U32 R58, RZ, RZ, R12 ;
/*0cd0*/ ISETP.GE.U32.AND.EX P2, PT, R87, R89, PT, P2 ;
/*0ce0*/ IMAD.MOV.U32 R59, RZ, RZ, R79 ;
/*0cf0*/ MOV R64, R11 ;
/*0d00*/ IMAD.MOV.U32 R65, RZ, RZ, R78 ;
/*0d10*/ MOV R52, R9 ;
/*0d20*/ IMAD.MOV.U32 R53, RZ, RZ, R10 ;
/*0d30*/ MOV R93, R85 ;
/*0d40*/ IMAD.MOV.U32 R6, RZ, RZ, R87 ;
/*0d50*/ @!P2 BRA `(.L_x_609) ;
/*0d60*/ LD.E.64 R44, [R58.64] ;
/*0d70*/ LEA R46, P0, R44, R20, 0x3 ;
/*0d80*/ LEA.HI.X R47, R44, R21, R45, 0x3, P0 ;
/*0d90*/ LD.E.64 R46, [R46.64] ;
/*0da0*/ IMAD R15, R45, 0x18, RZ ;
/*0db0*/ IMAD.WIDE.U32 R44, R44, 0x18, R34 ;
/*0dc0*/ IMAD.MOV.U32 R42, RZ, RZ, R52 ;
/*0dd0*/ IMAD.MOV.U32 R43, RZ, RZ, R53 ;
/*0de0*/ IMAD.IADD R45, R45, 0x1, R15 ;
/*0df0*/ IADD3 R93, P2, R93, 0x1, RZ ;
/*0e00*/ ST.E.64 [R64.64], R46 ;
/*0e10*/ LD.E.64 R42, [R42.64] ;
/*0e20*/ LD.E.64 R40, [R44.64] ;
/*0e30*/ LD.E.64 R38, [R44.64+0x8] ;
/*0e40*/ LD.E.64 R14, [R44.64+0x10] ;
/*0e50*/ IADD3.X R6, RZ, R6, RZ, P2, !PT ;
/*0e60*/ IADD3 R64, P2, R64, 0x8, RZ ;
/*0e70*/ IADD3 R52, P0, R52, R33, RZ ;
/*0e80*/ IMAD.X R65, RZ, RZ, R65, P2 ;
/*0e90*/ IADD3 R58, P2, R58, 0x8, RZ ;
/*0ea0*/ IMAD.X R53, R53, 0x1, R13, P0 ;
/*0eb0*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*0ec0*/ IADD3.X R59, RZ, R59, RZ, P2, !PT ;
/*0ed0*/ DFMA R18, R42, R40, R18 ;
/*0ee0*/ DFMA R16, R42, R38, R16 ;
/*0ef0*/ DFMA R2, R42, R14, R2 ;
.L_x_609:
/*0f00*/ BSYNC B1 ;
.L_x_608:
/*0f10*/ IADD3 R14, P2, R82, -R93.reuse, RZ ;
/*0f20*/ BSSY B1, `(.L_x_610) ;
/*0f30*/ ISETP.LE.U32.AND P3, PT, R14, 0x3, PT ;
/*0f40*/ IMAD.X R15, R89, 0x1, ~R6, P2 ;
/*0f50*/ ISETP.GT.U32.AND P2, PT, R82, R93, PT ;
/*0f60*/ ISETP.GT.U32.AND.EX P2, PT, R89, R6, PT, P2 ;
/*0f70*/ ISETP.LE.U32.OR.EX P2, PT, R15, RZ, !P2, P3 ;
/*0f80*/ @P2 BRA `(.L_x_611) ;
/*0f90*/ IADD3 R84, P2, R82, -0x3, RZ ;
/*0fa0*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*0fb0*/ IADD3.X R86, R89, -0x1, RZ, P2, !PT ;
.L_x_612:
/*0fc0*/ IMAD.MOV.U32 R42, RZ, RZ, R58 ;
/*0fd0*/ IMAD.MOV.U32 R43, RZ, RZ, R59 ;
/*0fe0*/ LD.E.64 R14, [R42.64] ;
/*0ff0*/ LEA R44, P2, R14, R20, 0x3 ;
/*1000*/ LEA.HI.X R45, R14, R21, R15, 0x3, P2 ;
/*1010*/ LD.E.64 R44, [R44.64] ;
/*1020*/ IMAD.MOV.U32 R40, RZ, RZ, R64 ;
/*1030*/ MOV R41, R65 ;
/*1040*/ ST.E.64 [R40.64], R44 ;
/*1050*/ LD.E.64 R38, [R42.64+0x8] ;
/*1060*/ IMAD R15, R15, 0x18, RZ ;
/*1070*/ IMAD.WIDE.U32 R50, R14, 0x18, R34 ;
/*1080*/ LD.E.64 R64, [R52.64] ;
/*1090*/ LEA R48, P2, R38, R20, 0x3 ;
/*10a0*/ LEA.HI.X R49, R38, R21, R39, 0x3, P2 ;
/*10b0*/ LD.E.64 R48, [R48.64] ;
/*10c0*/ IMAD.IADD R51, R51, 0x1, R15 ;
/*10d0*/ LD.E.64 R70, [R50.64] ;
/*10e0*/ LD.E.64 R68, [R50.64+0x8] ;
/*10f0*/ LD.E.64 R66, [R50.64+0x10] ;
/*1100*/ ST.E.64 [R40.64+0x8], R48 ;
/*1110*/ LD.E.64 R54, [R42.64+0x10] ;
/*1120*/ LEA R44, P2, R54, R20, 0x3 ;
/*1130*/ LEA.HI.X R45, R54, R21, R55, 0x3, P2 ;
/*1140*/ LD.E.64 R44, [R44.64] ;
/*1150*/ IADD3 R14, P2, R52, R33, RZ ;
/*1160*/ IMAD R39, R39, 0x18, RZ ;
/*1170*/ IMAD.WIDE.U32 R46, R38, 0x18, R34 ;
/*1180*/ IMAD.X R15, R53, 0x1, R13, P2 ;
/*1190*/ IMAD.IADD R47, R47, 0x1, R39 ;
/*11a0*/ LD.E.64 R58, [R14.64] ;
/*11b0*/ LD.E.64 R62, [R46.64] ;
/*11c0*/ LD.E.64 R38, [R46.64+0x8] ;
/*11d0*/ LD.E.64 R60, [R46.64+0x10] ;
/*11e0*/ ST.E.64 [R40.64+0x10], R44 ;
/*11f0*/ LD.E.64 R72, [R42.64+0x18] ;
/*1200*/ IMAD R49, R55, 0x18, RZ ;
/*1210*/ IMAD.WIDE.U32 R54, R54, 0x18, R34 ;
/*1220*/ IADD3 R55, R55, R49, RZ ;
/*1230*/ LD.E.64 R44, [R54.64] ;
/*1240*/ LD.E.64 R56, [R54.64+0x8] ;
/*1250*/ LD.E.64 R54, [R54.64+0x10] ;
/*1260*/ LEA R76, P2, R72, R20, 0x3 ;
/*1270*/ LEA.HI.X R77, R72, R21, R73, 0x3, P2 ;
/*1280*/ LD.E.64 R76, [R76.64] ;
/*1290*/ IADD3 R74, P2, R14, R33, RZ ;
/*12a0*/ IMAD.X R75, R15, 0x1, R13, P2 ;
/*12b0*/ LD.E.64 R52, [R74.64] ;
/*12c0*/ IMAD R15, R73, 0x18, RZ ;
/*12d0*/ IADD3 R14, P2, R74, R33, RZ ;
/*12e0*/ IMAD.WIDE.U32 R72, R72, 0x18, R34 ;
/*12f0*/ IMAD.IADD R73, R73, 0x1, R15 ;
/*1300*/ IADD3.X R15, R75, R13, RZ, P2, !PT ;
/*1310*/ DFMA R70, R64, R70, R18 ;
/*1320*/ DFMA R68, R64, R68, R16 ;
/*1330*/ DFMA R66, R64, R66, R2 ;
/*1340*/ IADD3 R64, P2, R40, 0x20, RZ ;
/*1350*/ DFMA R62, R58.reuse, R62, R70 ;
/*1360*/ ST.E.64 [R40.64+0x18], R76 ;
/*1370*/ LD.E.64 R48, [R14.64] ;
/*1380*/ LD.E.64 R46, [R72.64] ;
/*1390*/ LD.E.64 R50, [R72.64+0x8] ;
/*13a0*/ LD.E.64 R74, [R72.64+0x10] ;
/*13b0*/ DFMA R38, R58, R38, R68 ;
/*13c0*/ IMAD.X R65, RZ, RZ, R41, P2 ;
/*13d0*/ DFMA R60, R58, R60, R66 ;
/*13e0*/ IADD3 R58, P2, R42, 0x20, RZ ;
/*13f0*/ IMAD.X R59, RZ, RZ, R43, P2 ;
/*1400*/ IADD3 R93, P2, R93, 0x4, RZ ;
/*1410*/ DFMA R44, R52, R44, R62 ;
/*1420*/ IADD3.X R6, RZ, R6, RZ, P2, !PT ;
/*1430*/ DFMA R38, R52, R56, R38 ;
/*1440*/ DFMA R54, R52, R54, R60 ;
/*1450*/ IADD3 R52, P2, R14, R33, RZ ;
/*1460*/ IMAD.X R53, R15, 0x1, R13, P2 ;
/*1470*/ ISETP.GE.U32.AND P2, PT, R93, R84, PT ;
/*1480*/ ISETP.GE.U32.AND.EX P2, PT, R6, R86, PT, P2 ;
/*1490*/ DFMA R18, R48, R46, R44 ;
/*14a0*/ DFMA R16, R48, R50, R38 ;
/*14b0*/ DFMA R2, R48, R74, R54 ;
/*14c0*/ @!P2 BRA `(.L_x_612) ;
.L_x_611:
/*14d0*/ BSYNC B1 ;
.L_x_610:
/*14e0*/ IADD3 R14, P2, R82, -R93.reuse, RZ ;
/*14f0*/ BSSY B1, `(.L_x_613) ;
/*1500*/ ISETP.LE.U32.AND P3, PT, R14, 0x1, PT ;
/*1510*/ IMAD.X R15, R89, 0x1, ~R6, P2 ;
/*1520*/ ISETP.GT.U32.AND P2, PT, R82, R93, PT ;
/*1530*/ ISETP.GT.U32.AND.EX P2, PT, R89, R6, PT, P2 ;
/*1540*/ ISETP.LE.U32.OR.EX P2, PT, R15, RZ, !P2, P3 ;
/*1550*/ @P2 BRA `(.L_x_614) ;
/*1560*/ LD.E.64 R60, [R58.64] ;
/*1570*/ LEA R38, P0, R60, R20, 0x3 ;
/*1580*/ LEA.HI.X R39, R60, R21, R61, 0x3, P0 ;
/*1590*/ LD.E.64 R38, [R38.64] ;
/*15a0*/ ST.E.64 [R64.64], R38 ;
/*15b0*/ LD.E.64 R54, [R58.64+0x8] ;
/*15c0*/ IMAD R15, R61, 0x18, RZ ;
/*15d0*/ IMAD.WIDE.U32 R60, R60, 0x18, R34 ;
/*15e0*/ LEA R56, P0, R54, R20, 0x3 ;
/*15f0*/ LEA.HI.X R57, R54, R21, R55, 0x3, P0 ;
/*1600*/ LD.E.64 R56, [R56.64] ;
/*1610*/ IADD3 R14, P0, R52, R33, RZ ;
/*1620*/ IMAD.IADD R61, R61, 0x1, R15 ;
/*1630*/ IMAD R41, R55, 0x18, RZ ;
/*1640*/ IMAD.WIDE.U32 R54, R54, 0x18, R34 ;
/*1650*/ LD.E.64 R50, [R60.64] ;
/*1660*/ IMAD.X R15, R53, 0x1, R13, P0 ;
/*1670*/ LD.E.64 R48, [R60.64+0x8] ;
/*1680*/ IMAD.IADD R55, R55, 0x1, R41 ;
/*1690*/ LD.E.64 R52, [R52.64] ;
/*16a0*/ LD.E.64 R46, [R60.64+0x10] ;
/*16b0*/ ST.E.64 [R64.64+0x8], R56 ;
/*16c0*/ LD.E.64 R44, [R14.64] ;
/*16d0*/ LD.E.64 R38, [R54.64] ;
/*16e0*/ LD.E.64 R40, [R54.64+0x8] ;
/*16f0*/ LD.E.64 R42, [R54.64+0x10] ;
/*1700*/ IADD3 R93, P2, R93, 0x2, RZ ;
/*1710*/ DFMA R18, R52, R50, R18 ;
/*1720*/ IADD3.X R6, RZ, R6, RZ, P2, !PT ;
/*1730*/ IADD3 R64, P2, R64, 0x10, RZ ;
/*1740*/ DFMA R16, R52, R48, R16 ;
/*1750*/ DFMA R2, R52, R46, R2 ;
/*1760*/ IMAD.X R65, RZ, RZ, R65, P2 ;
/*1770*/ IADD3 R52, P0, R14, R33, RZ ;
/*1780*/ IADD3 R58, P2, R58, 0x10, RZ ;
/*1790*/ IMAD.X R53, R15, 0x1, R13, P0 ;
/*17a0*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*17b0*/ IADD3.X R59, RZ, R59, RZ, P2, !PT ;
/*17c0*/ DFMA R18, R44, R38, R18 ;
/*17d0*/ DFMA R16, R44, R40, R16 ;
/*17e0*/ DFMA R2, R44, R42, R2 ;
.L_x_614:
/*17f0*/ BSYNC B1 ;
.L_x_613:
/*1800*/ ISETP.LT.U32.AND P2, PT, R93, R82, PT ;
/*1810*/ ISETP.LT.U32.OR.EX P0, PT, R6, R89, P0, P2 ;
/*1820*/ @P0 LD.E.64 R14, [R58.64] ;
/*1830*/ @P0 LEA R38, P2, R14, R20, 0x3 ;
/*1840*/ @P0 LEA.HI.X R39, R14, R21, R15, 0x3, P2 ;
/*1850*/ @P0 LD.E.64 R38, [R38.64] ;
/*1860*/ @P0 IMAD R41, R15, 0x18, RZ ;
/*1870*/ @P0 IMAD.WIDE.U32 R14, R14, 0x18, R34 ;
/*1880*/ @P0 IMAD.IADD R15, R15, 0x1, R41 ;
/*1890*/ @P0 ST.E.64 [R64.64], R38 ;
/*18a0*/ @P0 LD.E.64 R40, [R52.64] ;
/*18b0*/ @P0 LD.E.64 R42, [R14.64] ;
/*18c0*/ @P0 LD.E.64 R44, [R14.64+0x8] ;
/*18d0*/ @P0 LD.E.64 R46, [R14.64+0x10] ;
/*18e0*/ @P0 DFMA R18, R40, R42, R18 ;
/*18f0*/ IADD3 R43, P2, R5, R8, RZ ;
/*1900*/ @P0 DFMA R16, R40, R44, R16 ;
/*1910*/ IMAD.X R7, R80, 0x1, R7, P2 ;
/*1920*/ @P0 DFMA R2, R40, R46, R2 ;
/*1930*/ ISETP.LT.U32.AND P0, PT, R43, R0, PT ;
/*1940*/ IADD3 R85, P2, R5, R85, RZ ;
/*1950*/ ISETP.LT.U32.AND.EX P0, PT, R7, R4, PT, P0 ;
/*1960*/ IMAD.X R87, R80, 0x1, R87, P2 ;
/*1970*/ IADD3 R9, P2, R9, R36, RZ ;
/*1980*/ SEL R8, R43, R0, P0 ;
/*1990*/ SEL R7, R7, R4, P0 ;
/*19a0*/ IMAD.X R10, R10, 0x1, R91, P2 ;
/*19b0*/ IADD3 R11, P0, R11, R81, RZ ;
/*19c0*/ IADD3 R82, P2, R8, 0x1, RZ ;
/*19d0*/ IMAD.X R78, R78, 0x1, R83, P0 ;
/*19e0*/ ISETP.GE.U32.AND P0, PT, R85, R82, PT ;
/*19f0*/ IMAD.X R89, RZ, RZ, R7, P2 ;
/*1a00*/ IADD3 R12, P2, R12, R81, RZ ;
/*1a10*/ ISETP.GE.U32.AND.EX P0, PT, R87, R89, PT, P0 ;
/*1a20*/ IMAD.X R79, R79, 0x1, R83, P2 ;
/*1a30*/ @!P0 BRA `(.L_x_615) ;
.L_x_607:
/*1a40*/ BSYNC B0 ;
.L_x_606:
/*1a50*/ STL.64 [R1], R18 ;
/*1a60*/ MOV R33, 0x1 ;
/*1a70*/ STL.64 [R1+0x8], R16 ;
/*1a80*/ STL.64 [R1+0x10], R2 ;
/*1a90*/ @P1 BRA `(.L_x_616) ;
/*1aa0*/ LDS R33, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*1ab0*/ ISETP.NE.AND P0, PT, R33, RZ, PT ;
/*1ac0*/ @!P0 LDS R0, [`(IsSPMDMode)] ;
/*1ad0*/ ISETP.NE.AND P1, PT, R0, RZ, !P0 ;
/*1ae0*/ @!P0 IMAD.MOV.U32 R0, RZ, RZ, c[0x0][0x0] ;
/*1af0*/ @!P0 IADD3 R30, R0, -0x20, RZ ;
/*1b00*/ @!P0 IMAD.MOV.U32 R0, RZ, RZ, R30 ;
/*1b10*/ @P1 IADD3 R0, RZ, c[0x0][0x0], RZ ;
/*1b20*/ @!P0 IMAD.MOV.U32 R33, RZ, RZ, R0 ;
.L_x_616:
/*1b30*/ ISETP.NE.AND P0, PT, R33, 0x1, PT ;
/*1b40*/ BSSY B9, `(.L_x_617) ;
/*1b50*/ IMAD.MOV.U32 R40, RZ, RZ, R2 ;
/*1b60*/ MOV R41, R3 ;
/*1b70*/ @!P0 BRA `(.L_x_618) ;
/*1b80*/ IADD3 R42, R33.reuse, 0x1f, RZ ;
/*1b90*/ BSSY B7, `(.L_x_619) ;
/*1ba0*/ LOP3.LUT P0, R44, R33, 0x1f, RZ, 0xc0, !PT ;
/*1bb0*/ IMAD.MOV.U32 R34, RZ, RZ, R40 ;
/*1bc0*/ SHF.R.U32.HI R43, RZ, 0x5, R42 ;
/*1bd0*/ IMAD.MOV.U32 R35, RZ, RZ, R41 ;
/*1be0*/ IADD3 R3, R43, -0x1, RZ ;
/*1bf0*/ ISETP.GE.U32.AND P1, PT, R26, R3, PT ;
/*1c00*/ @P0 BRA P1, `(.L_x_620) ;
/*1c10*/ BRA.DIV ~URZ, `(.L_x_621) ;
/*1c20*/ SHFL.DOWN PT, R3, R19, 0x10, 0x1f ;
/*1c30*/ SHFL.DOWN PT, R2, R18, 0x10, 0x1f ;
/*1c40*/ SHFL.DOWN PT, R5, R17, 0x10, 0x1f ;
/*1c50*/ SHFL.DOWN PT, R4, R16, 0x10, 0x1f ;
/*1c60*/ SHFL.DOWN PT, R7, R35, 0x10, 0x1f ;
/*1c70*/ SHFL.DOWN PT, R6, R34, 0x10, 0x1f ;
/*1c80*/ DADD R2, R18, R2 ;
/*1c90*/ SHFL.DOWN PT, R9, R3, 0x8, 0x1f ;
/*1ca0*/ DADD R4, R16, R4 ;
/*1cb0*/ SHFL.DOWN PT, R8, R2, 0x8, 0x1f ;
/*1cc0*/ SHFL.DOWN PT, R11, R5, 0x8, 0x1f ;
/*1cd0*/ DADD R6, R6, R40 ;
/*1ce0*/ SHFL.DOWN PT, R10, R4, 0x8, 0x1f ;
/*1cf0*/ SHFL.DOWN PT, R13, R7, 0x8, 0x1f ;
/*1d00*/ SHFL.DOWN PT, R12, R6, 0x8, 0x1f ;
/*1d10*/ DADD R8, R2, R8 ;
/*1d20*/ SHFL.DOWN PT, R15, R9, 0x4, 0x1f ;
/*1d30*/ DADD R10, R4, R10 ;
/*1d40*/ SHFL.DOWN PT, R14, R8, 0x4, 0x1f ;
/*1d50*/ SHFL.DOWN PT, R17, R11, 0x4, 0x1f ;
/*1d60*/ DADD R12, R6, R12 ;
/*1d70*/ SHFL.DOWN PT, R16, R10, 0x4, 0x1f ;
/*1d80*/ SHFL.DOWN PT, R3, R13, 0x4, 0x1f ;
/*1d90*/ SHFL.DOWN PT, R2, R12, 0x4, 0x1f ;
/*1da0*/ DADD R14, R8, R14 ;
/*1db0*/ SHFL.DOWN PT, R7, R15, 0x2, 0x1f ;
/*1dc0*/ DADD R4, R10, R16 ;
/*1dd0*/ SHFL.DOWN PT, R6, R14, 0x2, 0x1f ;
/*1de0*/ SHFL.DOWN PT, R35, R5, 0x2, 0x1f ;
/*1df0*/ DADD R2, R12, R2 ;
/*1e00*/ SHFL.DOWN PT, R34, R4, 0x2, 0x1f ;
/*1e10*/ SHFL.DOWN PT, R37, R3, 0x2, 0x1f ;
/*1e20*/ SHFL.DOWN PT, R36, R2, 0x2, 0x1f ;
/*1e30*/ DADD R44, R14, R6 ;
/*1e40*/ SHFL.DOWN PT, R17, R45, 0x1, 0x1f ;
/*1e50*/ DADD R34, R4, R34 ;
/*1e60*/ SHFL.DOWN PT, R16, R44, 0x1, 0x1f ;
/*1e70*/ SHFL.DOWN PT, R39, R35, 0x1, 0x1f ;
/*1e80*/ DADD R36, R2, R36 ;
/*1e90*/ SHFL.DOWN PT, R38, R34, 0x1, 0x1f ;
/*1ea0*/ SHFL.DOWN PT, R41, R37, 0x1, 0x1f ;
/*1eb0*/ SHFL.DOWN PT, R4, R36, 0x1, 0x1f ;
.L_x_764:
/*1ec0*/ MOV R5, R41 ;
/*1ed0*/ DADD R18, R16, R44 ;
/*1ee0*/ DADD R16, R38, R34 ;
/*1ef0*/ STL.64 [R1], R18 ;
/*1f00*/ DADD R36, R4, R36 ;
/*1f10*/ STL.64 [R1+0x8], R16 ;
/*1f20*/ STL.64 [R1+0x10], R36 ;
/*1f30*/ BRA `(.L_x_622) ;
.L_x_620:
/*1f40*/ ISETP.GE.U32.AND P0, PT, R44, 0x2, PT ;
/*1f50*/ @!P0 BRA `(.L_x_622) ;
/*1f60*/ BSSY B6, `(.L_x_623) ;
/*1f70*/ SHF.R.U32.HI R37, RZ, 0x1, R44 ;
.L_x_625:
/*1f80*/ BRA.DIV ~URZ, `(.L_x_624) ;
/*1f90*/ SHFL.DOWN PT, R41, R19, R37, 0x1f ;
/*1fa0*/ SHFL.DOWN PT, R40, R18, R37, 0x1f ;
/*1fb0*/ SHFL.DOWN PT, R39, R17, R37, 0x1f ;
/*1fc0*/ SHFL.DOWN PT, R38, R16, R37, 0x1f ;
/*1fd0*/ SHFL.DOWN PT, R36, R35, R37, 0x1f ;
/*1fe0*/ SHFL.DOWN PT, R2, R34, R37, 0x1f ;
.L_x_765:
/*1ff0*/ LOP3.LUT R0, R37, 0xffff, RZ, 0xc0, !PT ;
/*2000*/ IMAD.MOV.U32 R3, RZ, RZ, R36 ;
/*2010*/ IADD3 R37, R44, 0x1, RZ ;
/*2020*/ ISETP.GE.U32.AND P0, PT, R25, R0, PT ;
/*2030*/ SHF.R.U32.HI R0, RZ, 0x1, R37.reuse ;
/*2040*/ SHF.R.U32.HI R37, RZ, 0x2, R37 ;
/*2050*/ @!P0 DADD R40, R40, R18 ;
/*2060*/ @!P0 DADD R38, R38, R16 ;
/*2070*/ @!P0 DADD R2, R2, R34 ;
/*2080*/ ISETP.GT.U32.AND P0, PT, R44, 0x2, PT ;
/*2090*/ IMAD.MOV.U32 R44, RZ, RZ, R0 ;
/*20a0*/ MOV R18, R40 ;
/*20b0*/ IMAD.MOV.U32 R19, RZ, RZ, R41 ;
/*20c0*/ IMAD.MOV.U32 R16, RZ, RZ, R38 ;
/*20d0*/ IMAD.MOV.U32 R17, RZ, RZ, R39 ;
/*20e0*/ IMAD.MOV.U32 R34, RZ, RZ, R2 ;
/*20f0*/ MOV R35, R3 ;
/*2100*/ @P0 BRA `(.L_x_625) ;
/*2110*/ BSYNC B6 ;
.L_x_623:
/*2120*/ STL.64 [R1+0x10], R2 ;
/*2130*/ IMAD.MOV.U32 R16, RZ, RZ, R38 ;
/*2140*/ MOV R18, R40 ;
/*2150*/ IMAD.MOV.U32 R17, RZ, RZ, R39 ;
/*2160*/ STL.64 [R1], R40 ;
/*2170*/ MOV R19, R41 ;
/*2180*/ STL.64 [R1+0x8], R38 ;
.L_x_622:
/*2190*/ BSYNC B7 ;
.L_x_619:
/*21a0*/ ISETP.GE.U32.AND P1, PT, R33, 0x21, PT ;
/*21b0*/ BSSY B8, `(.L_x_626) ;
/*21c0*/ ISETP.NE.AND P2, PT, R32, RZ, PT ;
/*21d0*/ ISETP.NE.AND P0, PT, R22.reuse, UR38, PT ;
/*21e0*/ SEL R3, R22, RZ, P2 ;
/*21f0*/ SEL R38, R3, R22, !P0 ;
/*2200*/ @!P1 BRA `(.L_x_627) ;
/*2210*/ BSSY B7, `(.L_x_628) ;
/*2220*/ MOV R2, RZ ;
.L_x_641:
/*2230*/ LDS R0, [`(IsSPMDMode)] ;
/*2240*/ YIELD (*"RELOCATOR OPCODE,YIELD,280"*);
/*2250*/ BSSY B6, `(.L_x_629) ;
/*2260*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*2270*/ @P0 BRA `(.L_x_630) ;
/*2280*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*2290*/ ISETP.NE.AND P1, PT, R22, UR38, PT ;
/*22a0*/ ISETP.NE.OR P0, PT, R0, RZ, P1 ;
/*22b0*/ @P0 BRA `(.L_x_631) ;
/*22c0*/ @!PT LDS RZ, [RZ] ;
/*22d0*/ @!PT LDS RZ, [RZ] ;
/*22e0*/ @!PT LDS RZ, [RZ] ;
/*22f0*/ @!PT LDS RZ, [RZ] ;
/*2300*/ MEMBAR.SC.GPU ;
/*2310*/ ERRBAR;
/*2320*/ CCTL.IVALL ;
/*2330*/ BRA `(.L_x_632) ;
.L_x_631:
/*2340*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*2350*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*2360*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*2370*/ SEL R5, R30, R3, !P1 ;
/*2380*/ SEL R5, R5, 0x1, !P0 ;
/*2390*/ BRA.DIV ~URZ, `(.L_x_633) ;
/*23a0*/ BAR.SYNC 0x7, R5 ;
/*23b0*/ BRA `(.L_x_632) ;
.L_x_630:
/*23c0*/ WARPSYNC 0xffffffff ;
/*23d0*/ BAR.SYNC 0x0 ;
.L_x_632:
/*23e0*/ BSYNC B6 ;
.L_x_629:
/*23f0*/ ISETP.NE.AND P0, PT, R31, RZ, PT ;
/*2400*/ BSSY B0, `(.L_x_634) ;
/*2410*/ IADD3 R33, R1, R2, RZ ;
/*2420*/ @P0 BRA `(.L_x_635) ;
/*2430*/ LDL R3, [R33] ;
/*2440*/ STS [R28+`(__openmp_nvptx_data_transfer_temporary_storage)], R3 ;
.L_x_635:
/*2450*/ BSYNC B0 ;
.L_x_634:
/*2460*/ LDS R0, [`(IsSPMDMode)] ;
/*2470*/ BSSY B6, `(.L_x_636) ;
/*2480*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*2490*/ @P0 BRA `(.L_x_637) ;
/*24a0*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*24b0*/ ISETP.NE.AND P1, PT, R22, UR38, PT ;
/*24c0*/ ISETP.NE.OR P0, PT, R0, RZ, P1 ;
/*24d0*/ @P0 BRA `(.L_x_638) ;
/*24e0*/ @!PT LDS RZ, [RZ] ;
/*24f0*/ @!PT LDS RZ, [RZ] ;
/*2500*/ @!PT LDS RZ, [RZ] ;
/*2510*/ @!PT LDS RZ, [RZ] ;
/*2520*/ MEMBAR.SC.GPU ;
/*2530*/ ERRBAR;
/*2540*/ CCTL.IVALL ;
/*2550*/ BRA `(.L_x_639) ;
.L_x_638:
/*2560*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*2570*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*2580*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*2590*/ SEL R5, R30, R3, !P1 ;
/*25a0*/ SEL R5, R5, 0x1, !P0 ;
/*25b0*/ BRA.DIV ~URZ, `(.L_x_640) ;
/*25c0*/ BAR.SYNC 0x7, R5 ;
/*25d0*/ BRA `(.L_x_639) ;
.L_x_637:
/*25e0*/ WARPSYNC 0xffffffff ;
/*25f0*/ BAR.SYNC 0x0 ;
.L_x_639:
/*2600*/ BSYNC B6 ;
.L_x_636:
/*2610*/ ISETP.GE.U32.AND P0, PT, R22, R43, PT ;
/*2620*/ IADD3 R2, R2, 0x4, RZ ;
/*2630*/ @!P0 LDS R0, [R27+`(__openmp_nvptx_data_transfer_temporary_storage)] ;
/*2640*/ @!P0 STL [R33], R0 ;
/*2650*/ ISETP.NE.AND P0, PT, R2, 0x18, PT ;
/*2660*/ @P0 BRA `(.L_x_641) ;
/*2670*/ BSYNC B7 ;
.L_x_628:
/*2680*/ ISETP.GE.U32.AND P0, PT, R42, 0x40, PT ;
/*2690*/ ISETP.GT.U32.OR P0, PT, R22, 0x1f, !P0 ;
/*26a0*/ @P0 BRA `(.L_x_627) ;
/*26b0*/ LDL.64 R16, [R1+0x10] ;
/*26c0*/ LDL.64 R18, [R1] ;
/*26d0*/ LDL.64 R32, [R1+0x8] ;
/*26e0*/ BSSY B6, `(.L_x_642) ;
/*26f0*/ SHF.R.U32.HI R2, RZ, 0x6, R42 ;
.L_x_644:
/*2700*/ PRMT R39, R2, 0x9910, RZ ;
/*2710*/ BRA.DIV ~URZ, `(.L_x_643) ;
/*2720*/ SHFL.DOWN PT, R37, R19, R39, 0x1f ;
/*2730*/ SHFL.DOWN PT, R36, R18, R39, 0x1f ;
/*2740*/ SHFL.DOWN PT, R35, R33, R39, 0x1f ;
/*2750*/ SHFL.DOWN PT, R34, R32, R39, 0x1f ;
/*2760*/ SHFL.DOWN PT, R30, R17, R39, 0x1f ;
/*2770*/ SHFL.DOWN PT, R4, R16, R39, 0x1f ;
.L_x_766:
/*2780*/ LOP3.LUT R2, R2, 0xffff, RZ, 0xc0, !PT ;
/*2790*/ IMAD.MOV.U32 R5, RZ, RZ, R30 ;
/*27a0*/ LOP3.LUT R3, R38, 0xffff, RZ, 0xc0, !PT ;
/*27b0*/ ISETP.GE.U32.AND P0, PT, R3, R2, PT ;
/*27c0*/ IADD3 R2, R43, 0x1, RZ ;
/*27d0*/ SHF.R.U32.HI R0, RZ, 0x1, R2.reuse ;
/*27e0*/ SHF.R.U32.HI R2, RZ, 0x2, R2 ;
/*27f0*/ @!P0 DADD R36, R36, R18 ;
/*2800*/ @!P0 DADD R34, R34, R32 ;
/*2810*/ @!P0 DADD R4, R4, R16 ;
/*2820*/ ISETP.GT.U32.AND P0, PT, R43, 0x2, PT ;
/*2830*/ IMAD.MOV.U32 R43, RZ, RZ, R0 ;
/*2840*/ MOV R18, R36 ;
/*2850*/ IMAD.MOV.U32 R19, RZ, RZ, R37 ;
/*2860*/ IMAD.MOV.U32 R32, RZ, RZ, R34 ;
/*2870*/ IMAD.MOV.U32 R33, RZ, RZ, R35 ;
/*2880*/ IMAD.MOV.U32 R16, RZ, RZ, R4 ;
/*2890*/ MOV R17, R5 ;
/*28a0*/ @P0 BRA `(.L_x_644) ;
/*28b0*/ BSYNC B6 ;
.L_x_642:
/*28c0*/ STL.64 [R1+0x10], R4 ;
/*28d0*/ MOV R16, R32 ;
/*28e0*/ MOV R17, R33 ;
.L_x_627:
/*28f0*/ BSYNC B8 ;
.L_x_626:
/*2900*/ ISETP.NE.AND P0, PT, R38, RZ, PT ;
/*2910*/ @P0 BREAK B9 ;
/*2920*/ @P0 BRA `(.L_x_604) ;
/*2930*/ LDL.64 R40, [R1+0x10] ;
.L_x_618:
/*2940*/ BSYNC B9 ;
.L_x_617:
/*2950*/ LDS.64 R2, [`($__ratioGradRef_local_shared__2692)] ;
/*2960*/ LDS.64 R6, [`(($__ratioGradRef_local_shared__2692 + 0x8))] ;
/*2970*/ LDS.64 R4, [`(($__ratioGradRef_local_shared__2692 + 0x10))] ;
/*2980*/ DADD R2, R2, R18 ;
/*2990*/ DADD R16, R16, R6 ;
/*29a0*/ STS.64 [`($__ratioGradRef_local_shared__2692)], R2 ;
/*29b0*/ DADD R4, R4, R40 ;
/*29c0*/ STS.64 [`(($__ratioGradRef_local_shared__2692 + 0x8))], R16 ;
/*29d0*/ STS.64 [`(($__ratioGradRef_local_shared__2692 + 0x10))], R4 ;
.L_x_604:
/*29e0*/ BSYNC B10 ;
.L_x_603:
/*29f0*/ ISETP.NE.AND P0, PT, R29, RZ, PT ;
/*2a00*/ WARPSYNC 0xffffffff ;
/*2a10*/ BAR.SYNC 0x0 ;
/*2a20*/ @!P0 MOV R0, 0x1 ;
/*2a30*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_0)], RZ ;
/*2a40*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_1)], RZ ;
/*2a50*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_2)], R0 ;
/*2a60*/ BAR.SYNC 0x0 ;
/*2a70*/ BAR.SYNC 0x0 ;
/*2a80*/ BRA `(.L_x_645) ;
.L_x_602:
/*2a90*/ ISETP.GT.U32.AND P0, PT, R2, 0x1, PT ;
/*2aa0*/ @P0 BRA `(.L_x_646) ;
/*2ab0*/ IMAD.MOV.U32 R0, RZ, RZ, 0x1 ;
/*2ac0*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], R0 ;
/*2ad0*/ LDS.64 R4, [`($__NumPtcls1_shared2_$_0__2682)] ;
/*2ae0*/ ISETP.NE.U32.AND P0, PT, R4, RZ, PT ;
/*2af0*/ ISETP.NE.AND.EX P0, PT, R5, RZ, PT, P0 ;
/*2b00*/ @!P0 BRA `(.L_x_647) ;
/*2b10*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_1)] ;
/*2b20*/ BSSY B0, `(.L_x_648) ;
/*2b30*/ CS2R R2, SRZ ;
/*2b40*/ CS2R R18, SRZ ;
/*2b50*/ LDS R7, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*2b60*/ CS2R R16, SRZ ;
/*2b70*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*2b80*/ IADD3 R0, P1, R4, -0x1, RZ ;
/*2b90*/ SEL R21, R22, RZ, !P0 ;
/*2ba0*/ IADD3.X R4, R5, -0x1, RZ, P1, !PT ;
/*2bb0*/ ISETP.GE.U32.AND P0, PT, R0, R21, PT ;
/*2bc0*/ SHF.R.S32.HI R5, RZ, 0x1f, R21 ;
/*2bd0*/ ISETP.NE.AND P1, PT, R7, RZ, PT ;
/*2be0*/ ISETP.GE.U32.AND.EX P0, PT, R4, R5, PT, P0 ;
/*2bf0*/ SEL R32, R30, R7, !P1 ;
/*2c00*/ @!P0 BRA `(.L_x_649) ;
/*2c10*/ LDS R37, [`($__iw_shared5_$_0__2688)] ;
/*2c20*/ LDS.64 R8, [`($__psiMinv_temp_list_devptr5_shared_$_0__2694)] ;
/*2c30*/ LDS.64 R6, [`($__psiV_temp_list_ptr3_shared4_$_0__2686)] ;
/*2c40*/ LDS.64 R2, [`($__psiV_list_devptr4_shared6_$_0__2690)] ;
/*2c50*/ LDS.64 R12, [`($__dpsiV_list_ptr8_shared_$_0__2700)] ;
/*2c60*/ LDS R18, [`($__WorkingIndex7_shared7_$_0__2698)] ;
/*2c70*/ LDS.64 R80, [`($__confgListOccup_ptr2_shared3_$_0__2684)] ;
/*2c80*/ IMAD.WIDE.U32 R10, R37, 0x8, R8 ;
/*2c90*/ IMAD.WIDE.U32 R8, R37.reuse, 0x8, R6 ;
/*2ca0*/ LD.E.64 R16, [R10.64] ;
/*2cb0*/ LD.E.64 R14, [R8.64] ;
/*2cc0*/ IMAD.WIDE.U32 R2, R37, 0x8, R2 ;
/*2cd0*/ IMAD.WIDE.U32 R36, R37, 0x8, R12 ;
/*2ce0*/ LD.E.64 R34, [R2.64] ;
/*2cf0*/ LDS.64 R12, [`($__psiMinv_cols6_shared_$_0__2696)] ;
/*2d00*/ LD.E.64 R36, [R36.64] ;
/*2d10*/ IMAD.MOV.U32 R7, RZ, RZ, R21 ;
/*2d20*/ SHF.R.S32.HI R19, RZ, 0x1f, R18 ;
/*2d30*/ IMAD.MOV.U32 R6, RZ, RZ, R5 ;
/*2d40*/ IMAD.SHL.U32 R82, R32, 0x8, RZ ;
/*2d50*/ SHF.L.U32 R11, R7.reuse, 0x3, RZ ;
/*2d60*/ IMAD.MOV.U32 R88, RZ, RZ, R6.reuse ;
/*2d70*/ SHF.L.U64.HI R3, R7, 0x3, R6 ;
/*2d80*/ MOV R86, R7.reuse ;
/*2d90*/ IMAD R5, R13, R7.reuse, RZ ;
/*2da0*/ SHF.L.U64.HI R9, R12.reuse, 0x3, R13 ;
/*2db0*/ IMAD.WIDE.U32 R18, R12.reuse, R7, R18 ;
/*2dc0*/ SHF.L.U32 R13, R12, 0x3, RZ ;
/*2dd0*/ IMAD R5, R12, R6, R5 ;
/*2de0*/ IMAD.WIDE.U32 R38, R13, R32, RZ ;
/*2df0*/ IMAD.IADD R5, R19, 0x1, R5 ;
/*2e00*/ LEA R8, P0, R18, R16, 0x3 ;
/*2e10*/ IADD3 R10, P1, R14, R11.reuse, RZ ;
/*2e20*/ IMAD R14, R9, R32, RZ ;
/*2e30*/ IADD3 R11, P2, R80, R11, RZ ;
/*2e40*/ LEA.HI.X R12, R18, R17, R5, 0x3, P0 ;
/*2e50*/ IMAD.X R33, R15, 0x1, R3, P1 ;
/*2e60*/ IADD3.X R80, R81, R3, RZ, P2, !PT ;
/*2e70*/ CS2R R16, SRZ ;
/*2e80*/ SHF.R.S32.HI R81, RZ, 0x1f, R32 ;
/*2e90*/ CS2R R18, SRZ ;
/*2ea0*/ IADD3 R83, P0, R7, 0x1, RZ ;
/*2eb0*/ CS2R R2, SRZ ;
/*2ec0*/ SHF.L.U64.HI R84, R32, 0x3, R81 ;
/*2ed0*/ IMAD R85, R81, R13, R14 ;
/*2ee0*/ IADD3.X R90, RZ, R6, RZ, P0, !PT ;
/*2ef0*/ IADD3 R85, R39, R85, RZ ;
.L_x_657:
/*2f00*/ ISETP.GE.U32.AND P1, PT, R86, R83, PT ;
/*2f10*/ BSSY B1, `(.L_x_650) ;
/*2f20*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x80, 0x0 ;
/*2f30*/ IMAD.MOV.U32 R62, RZ, RZ, R11 ;
/*2f40*/ ISETP.GE.U32.AND.EX P1, PT, R88, R90, PT, P1 ;
/*2f50*/ IMAD.MOV.U32 R63, RZ, RZ, R80 ;
/*2f60*/ MOV R56, R10 ;
/*2f70*/ IMAD.MOV.U32 R57, RZ, RZ, R33 ;
/*2f80*/ MOV R52, R8 ;
/*2f90*/ IMAD.MOV.U32 R53, RZ, RZ, R12 ;
/*2fa0*/ MOV R92, R86 ;
/*2fb0*/ IMAD.MOV.U32 R5, RZ, RZ, R88 ;
/*2fc0*/ @!P1 BRA `(.L_x_651) ;
/*2fd0*/ LD.E.64 R44, [R62.64] ;
/*2fe0*/ LEA R46, P0, R44, R34, 0x3 ;
/*2ff0*/ LEA.HI.X R47, R44, R35, R45, 0x3, P0 ;
/*3000*/ LD.E.64 R46, [R46.64] ;
/*3010*/ IMAD R15, R45, 0x18, RZ ;
/*3020*/ IMAD.WIDE.U32 R44, R44, 0x18, R36 ;
/*3030*/ IMAD.IADD R45, R45, 0x1, R15 ;
/*3040*/ IADD3 R92, P1, R92, 0x1, RZ ;
/*3050*/ ST.E.64 [R56.64], R46 ;
/*3060*/ LD.E.64 R42, [R52.64] ;
/*3070*/ LD.E.64 R40, [R44.64] ;
/*3080*/ LD.E.64 R20, [R44.64+0x8] ;
/*3090*/ LD.E.64 R14, [R44.64+0x10] ;
/*30a0*/ IMAD.X R5, RZ, RZ, R5, P1 ;
/*30b0*/ IADD3 R52, P1, R52, R13, RZ ;
/*30c0*/ IADD3 R56, P2, R56, 0x8, RZ ;
/*30d0*/ IADD3 R62, P3, R62, 0x8, RZ ;
/*30e0*/ IMAD.X R53, R53, 0x1, R9, P1 ;
/*30f0*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*3100*/ IMAD.X R57, RZ, RZ, R57, P2 ;
/*3110*/ IMAD.X R63, RZ, RZ, R63, P3 ;
/*3120*/ DFMA R16, R42, R40, R16 ;
/*3130*/ DFMA R18, R42, R20, R18 ;
/*3140*/ DFMA R2, R42, R14, R2 ;
.L_x_651:
/*3150*/ BSYNC B1 ;
.L_x_650:
/*3160*/ IADD3 R14, P3, R83.reuse, -R92.reuse, RZ ;
/*3170*/ BSSY B1, `(.L_x_652) ;
/*3180*/ ISETP.GT.U32.AND P2, PT, R83, R92, PT ;
/*3190*/ ISETP.LE.U32.AND P1, PT, R14, 0x3, PT ;
/*31a0*/ IMAD.X R14, R90.reuse, 0x1, ~R5, P3 ;
/*31b0*/ ISETP.GT.U32.AND.EX P2, PT, R90, R5, PT, P2 ;
/*31c0*/ ISETP.LE.U32.OR.EX P1, PT, R14, RZ, !P2, P1 ;
/*31d0*/ @P1 BRA `(.L_x_653) ;
/*31e0*/ IADD3 R89, P1, R83, -0x3, RZ ;
/*31f0*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*3200*/ IADD3.X R87, R90, -0x1, RZ, P1, !PT ;
.L_x_654:
/*3210*/ IMAD.MOV.U32 R14, RZ, RZ, R62 ;
/*3220*/ IMAD.MOV.U32 R15, RZ, RZ, R63 ;
/*3230*/ LD.E.64 R20, [R14.64] ;
/*3240*/ LEA R40, P1, R20, R34, 0x3 ;
/*3250*/ LEA.HI.X R41, R20, R35, R21, 0x3, P1 ;
/*3260*/ LD.E.64 R40, [R40.64] ;
/*3270*/ IMAD.MOV.U32 R44, RZ, RZ, R56 ;
/*3280*/ MOV R45, R57 ;
/*3290*/ ST.E.64 [R44.64], R40 ;
/*32a0*/ LD.E.64 R42, [R14.64+0x8] ;
/*32b0*/ IMAD R21, R21, 0x18, RZ ;
/*32c0*/ IMAD.WIDE.U32 R48, R20, 0x18, R36 ;
/*32d0*/ LD.E.64 R64, [R52.64] ;
/*32e0*/ LEA R46, P1, R42, R34, 0x3 ;
/*32f0*/ LEA.HI.X R47, R42, R35, R43, 0x3, P1 ;
/*3300*/ LD.E.64 R46, [R46.64] ;
/*3310*/ IMAD.IADD R49, R49, 0x1, R21 ;
/*3320*/ LD.E.64 R70, [R48.64] ;
/*3330*/ LD.E.64 R68, [R48.64+0x8] ;
/*3340*/ LD.E.64 R66, [R48.64+0x10] ;
/*3350*/ ST.E.64 [R44.64+0x8], R46 ;
/*3360*/ LD.E.64 R78, [R14.64+0x10] ;
/*3370*/ IMAD R51, R43, 0x18, RZ ;
/*3380*/ LEA R40, P1, R78, R34, 0x3 ;
/*3390*/ LEA.HI.X R41, R78, R35, R79, 0x3, P1 ;
/*33a0*/ LD.E.64 R40, [R40.64] ;
/*33b0*/ IADD3 R20, P1, R52, R13, RZ ;
/*33c0*/ IMAD.WIDE.U32 R42, R42, 0x18, R36 ;
/*33d0*/ IMAD.X R21, R53, 0x1, R9, P1 ;
/*33e0*/ IMAD.IADD R43, R43, 0x1, R51 ;
/*33f0*/ LD.E.64 R56, [R20.64] ;
/*3400*/ LD.E.64 R62, [R42.64] ;
/*3410*/ LD.E.64 R60, [R42.64+0x8] ;
/*3420*/ LD.E.64 R58, [R42.64+0x10] ;
/*3430*/ ST.E.64 [R44.64+0x10], R40 ;
/*3440*/ LD.E.64 R72, [R14.64+0x18] ;
/*3450*/ IMAD R47, R79, 0x18, RZ ;
/*3460*/ IMAD.WIDE.U32 R78, R78, 0x18, R36 ;
/*3470*/ LEA R76, P1, R72, R34, 0x3 ;
/*3480*/ LEA.HI.X R77, R72, R35, R73, 0x3, P1 ;
/*3490*/ LD.E.64 R76, [R76.64] ;
/*34a0*/ IMAD R49, R73, 0x18, RZ ;
/*34b0*/ IADD3 R48, P1, R20, R13, RZ ;
/*34c0*/ IMAD.WIDE.U32 R72, R72, 0x18, R36 ;
/*34d0*/ IADD3 R79, R79, R47, RZ ;
/*34e0*/ IMAD.IADD R73, R73, 0x1, R49 ;
/*34f0*/ IMAD.X R49, R21, 0x1, R9, P1 ;
/*3500*/ IADD3 R40, P1, R48, R13, RZ ;
/*3510*/ LD.E.64 R52, [R78.64+0x8] ;
/*3520*/ IADD3.X R41, R49, R9, RZ, P1, !PT ;
/*3530*/ LD.E.64 R54, [R78.64] ;
/*3540*/ LD.E.64 R48, [R48.64] ;
/*3550*/ LD.E.64 R50, [R78.64+0x10] ;
/*3560*/ ST.E.64 [R44.64+0x18], R76 ;
/*3570*/ LD.E.64 R20, [R40.64] ;
/*3580*/ LD.E.64 R46, [R72.64+0x8] ;
/*3590*/ LD.E.64 R42, [R72.64] ;
/*35a0*/ LD.E.64 R74, [R72.64+0x10] ;
/*35b0*/ IADD3 R92, P1, R92, 0x4, RZ ;
/*35c0*/ DFMA R68, R64, R68, R18 ;
/*35d0*/ DFMA R70, R64.reuse, R70, R16 ;
/*35e0*/ IMAD.X R5, RZ, RZ, R5, P1 ;
/*35f0*/ ISETP.GE.U32.AND P1, PT, R92, R89, PT ;
/*3600*/ DFMA R66, R64, R66, R2 ;
/*3610*/ DFMA R60, R56, R60, R68 ;
/*3620*/ ISETP.GE.U32.AND.EX P1, PT, R5, R87, PT, P1 ;
/*3630*/ DFMA R62, R56, R62, R70 ;
/*3640*/ DFMA R58, R56, R58, R66 ;
/*3650*/ IADD3 R56, P2, R44, 0x20, RZ ;
/*3660*/ DFMA R52, R48, R52, R60 ;
/*3670*/ DFMA R54, R48, R54, R62 ;
/*3680*/ IMAD.X R57, RZ, RZ, R45, P2 ;
/*3690*/ DFMA R50, R48, R50, R58 ;
/*36a0*/ IADD3 R62, P2, R14, 0x20, RZ ;
/*36b0*/ IADD3.X R63, RZ, R15, RZ, P2, !PT ;
/*36c0*/ DFMA R18, R20.reuse, R46, R52 ;
/*36d0*/ IADD3 R52, P3, R40, R13, RZ ;
/*36e0*/ DFMA R16, R20, R42, R54 ;
/*36f0*/ DFMA R2, R20, R74, R50 ;
/*3700*/ IMAD.X R53, R41, 0x1, R9, P3 ;
/*3710*/ @!P1 BRA `(.L_x_654) ;
.L_x_653:
/*3720*/ BSYNC B1 ;
.L_x_652:
/*3730*/ IADD3 R14, P3, R83.reuse, -R92.reuse, RZ ;
/*3740*/ BSSY B1, `(.L_x_655) ;
/*3750*/ ISETP.GT.U32.AND P2, PT, R83, R92, PT ;
/*3760*/ ISETP.LE.U32.AND P1, PT, R14, 0x1, PT ;
/*3770*/ IMAD.X R14, R90.reuse, 0x1, ~R5, P3 ;
/*3780*/ ISETP.GT.U32.AND.EX P2, PT, R90, R5, PT, P2 ;
/*3790*/ ISETP.LE.U32.OR.EX P1, PT, R14, RZ, !P2, P1 ;
/*37a0*/ @P1 BRA `(.L_x_656) ;
/*37b0*/ LD.E.64 R60, [R62.64] ;
/*37c0*/ LEA R20, P0, R60, R34, 0x3 ;
/*37d0*/ LEA.HI.X R21, R60, R35, R61, 0x3, P0 ;
/*37e0*/ LD.E.64 R20, [R20.64] ;
/*37f0*/ ST.E.64 [R56.64], R20 ;
/*3800*/ LD.E.64 R54, [R62.64+0x8] ;
/*3810*/ IMAD R15, R61, 0x18, RZ ;
/*3820*/ IMAD.WIDE.U32 R60, R60, 0x18, R36 ;
/*3830*/ LEA R58, P0, R54, R34, 0x3 ;
/*3840*/ LEA.HI.X R59, R54, R35, R55, 0x3, P0 ;
/*3850*/ LD.E.64 R58, [R58.64] ;
/*3860*/ IMAD R41, R55, 0x18, RZ ;
/*3870*/ IADD3 R14, P0, R52, R13, RZ ;
/*3880*/ IMAD.WIDE.U32 R54, R54, 0x18, R36 ;
/*3890*/ IMAD.IADD R61, R61, 0x1, R15 ;
/*38a0*/ IMAD.IADD R55, R55, 0x1, R41 ;
/*38b0*/ IMAD.X R15, R53, 0x1, R9, P0 ;
/*38c0*/ LD.E.64 R50, [R60.64] ;
/*38d0*/ LD.E.64 R52, [R52.64] ;
/*38e0*/ LD.E.64 R20, [R60.64+0x8] ;
/*38f0*/ LD.E.64 R40, [R60.64+0x10] ;
/*3900*/ ST.E.64 [R56.64+0x8], R58 ;
/*3910*/ LD.E.64 R42, [R14.64] ;
/*3920*/ LD.E.64 R44, [R54.64] ;
/*3930*/ LD.E.64 R46, [R54.64+0x8] ;
/*3940*/ LD.E.64 R48, [R54.64+0x10] ;
/*3950*/ IADD3 R92, P1, R92, 0x2, RZ ;
/*3960*/ DFMA R16, R52, R50, R16 ;
/*3970*/ IADD3 R56, P2, R56, 0x10, RZ ;
/*3980*/ DFMA R18, R52, R20, R18 ;
/*3990*/ IADD3.X R5, RZ, R5, RZ, P1, !PT ;
/*39a0*/ DFMA R2, R52, R40, R2 ;
/*39b0*/ IADD3 R62, P3, R62, 0x10, RZ ;
/*39c0*/ IADD3 R52, P1, R14, R13, RZ ;
/*39d0*/ IMAD.X R57, RZ, RZ, R57, P2 ;
/*39e0*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*39f0*/ IADD3.X R63, RZ, R63, RZ, P3, !PT ;
/*3a00*/ IMAD.X R53, R15, 0x1, R9, P1 ;
/*3a10*/ DFMA R16, R42, R44, R16 ;
/*3a20*/ DFMA R18, R42, R46, R18 ;
/*3a30*/ DFMA R2, R42, R48, R2 ;
.L_x_656:
/*3a40*/ BSYNC B1 ;
.L_x_655:
/*3a50*/ ISETP.LT.U32.AND P1, PT, R92, R83, PT ;
/*3a60*/ ISETP.LT.U32.OR.EX P0, PT, R5, R90, P0, P1 ;
/*3a70*/ @P0 LD.E.64 R14, [R62.64] ;
/*3a80*/ @P0 LEA R20, P1, R14, R34, 0x3 ;
/*3a90*/ @P0 LEA.HI.X R21, R14, R35, R15, 0x3, P1 ;
/*3aa0*/ @P0 LD.E.64 R20, [R20.64] ;
/*3ab0*/ @P0 IMAD R5, R15, 0x18, RZ ;
/*3ac0*/ @P0 IMAD.WIDE.U32 R14, R14, 0x18, R36 ;
/*3ad0*/ @P0 IMAD.IADD R15, R15, 0x1, R5 ;
/*3ae0*/ IADD3 R7, P1, R32, R7, RZ ;
/*3af0*/ IMAD.X R5, R81, 0x1, R6, P1 ;
/*3b00*/ ISETP.LT.U32.AND P1, PT, R7, R0, PT ;
/*3b10*/ @P0 ST.E.64 [R56.64], R20 ;
/*3b20*/ @P0 LD.E.64 R40, [R52.64] ;
/*3b30*/ @P0 LD.E.64 R42, [R14.64] ;
/*3b40*/ @P0 LD.E.64 R44, [R14.64+0x8] ;
/*3b50*/ @P0 LD.E.64 R46, [R14.64+0x10] ;
/*3b60*/ ISETP.LT.U32.AND.EX P1, PT, R5, R4, PT, P1 ;
/*3b70*/ IADD3 R86, P3, R32, R86, RZ ;
/*3b80*/ SEL R7, R7, R0, P1 ;
/*3b90*/ SEL R6, R5, R4, P1 ;
/*3ba0*/ IMAD.X R88, R81, 0x1, R88, P3 ;
/*3bb0*/ IADD3 R83, P2, R7, 0x1, RZ ;
/*3bc0*/ IADD3 R8, P3, R8, R38, RZ ;
/*3bd0*/ IADD3 R11, P1, R11, R82.reuse, RZ ;
/*3be0*/ IMAD.X R90, RZ, RZ, R6, P2 ;
/*3bf0*/ IADD3 R10, P2, R10, R82, RZ ;
/*3c00*/ IMAD.X R12, R12, 0x1, R85, P3 ;
/*3c10*/ IMAD.X R80, R80, 0x1, R84.reuse, P1 ;
/*3c20*/ IMAD.X R33, R33, 0x1, R84, P2 ;
/*3c30*/ @P0 DFMA R16, R40, R42, R16 ;
/*3c40*/ @P0 DFMA R18, R40, R44, R18 ;
/*3c50*/ @P0 DFMA R2, R40, R46, R2 ;
/*3c60*/ ISETP.GE.U32.AND P0, PT, R86, R83, PT ;
/*3c70*/ ISETP.GE.U32.AND.EX P0, PT, R88, R90, PT, P0 ;
/*3c80*/ @!P0 BRA `(.L_x_657) ;
.L_x_649:
/*3c90*/ BSYNC B0 ;
.L_x_648:
/*3ca0*/ STL.64 [R1], R16 ;
/*3cb0*/ ISETP.NE.AND P0, PT, R32, 0x1, PT ;
/*3cc0*/ BSSY B8, `(.L_x_647) ;
/*3cd0*/ IMAD.MOV.U32 R34, RZ, RZ, R2 ;
/*3ce0*/ STL.64 [R1+0x8], R18 ;
/*3cf0*/ MOV R35, R3 ;
/*3d00*/ STL.64 [R1+0x10], R2 ;
/*3d10*/ @!P0 BRA `(.L_x_658) ;
/*3d20*/ IADD3 R40, R32.reuse, 0x1f, RZ ;
/*3d30*/ BSSY B7, `(.L_x_659) ;
/*3d40*/ LOP3.LUT P0, R42, R32, 0x1f, RZ, 0xc0, !PT ;
/*3d50*/ SHF.R.U32.HI R41, RZ, 0x5, R40 ;
/*3d60*/ IADD3 R3, R41, -0x1, RZ ;
/*3d70*/ ISETP.GE.U32.AND P1, PT, R26, R3, PT ;
/*3d80*/ @P0 BRA P1, `(.L_x_660) ;
/*3d90*/ BRA.DIV ~URZ, `(.L_x_661) ;
/*3da0*/ SHFL.DOWN PT, R3, R17, 0x10, 0x1f ;
/*3db0*/ SHFL.DOWN PT, R2, R16, 0x10, 0x1f ;
/*3dc0*/ SHFL.DOWN PT, R5, R19, 0x10, 0x1f ;
/*3dd0*/ SHFL.DOWN PT, R4, R18, 0x10, 0x1f ;
/*3de0*/ SHFL.DOWN PT, R7, R35, 0x10, 0x1f ;
/*3df0*/ SHFL.DOWN PT, R6, R34, 0x10, 0x1f ;
/*3e00*/ DADD R2, R16, R2 ;
/*3e10*/ SHFL.DOWN PT, R9, R3, 0x8, 0x1f ;
/*3e20*/ DADD R4, R18, R4 ;
/*3e30*/ SHFL.DOWN PT, R8, R2, 0x8, 0x1f ;
/*3e40*/ SHFL.DOWN PT, R11, R5, 0x8, 0x1f ;
/*3e50*/ DADD R6, R6, R34 ;
/*3e60*/ SHFL.DOWN PT, R10, R4, 0x8, 0x1f ;
/*3e70*/ SHFL.DOWN PT, R13, R7, 0x8, 0x1f ;
/*3e80*/ SHFL.DOWN PT, R12, R6, 0x8, 0x1f ;
/*3e90*/ DADD R8, R2, R8 ;
/*3ea0*/ SHFL.DOWN PT, R15, R9, 0x4, 0x1f ;
/*3eb0*/ DADD R10, R4, R10 ;
/*3ec0*/ SHFL.DOWN PT, R14, R8, 0x4, 0x1f ;
/*3ed0*/ SHFL.DOWN PT, R17, R11, 0x4, 0x1f ;
/*3ee0*/ DADD R12, R6, R12 ;
/*3ef0*/ SHFL.DOWN PT, R16, R10, 0x4, 0x1f ;
/*3f00*/ SHFL.DOWN PT, R3, R13, 0x4, 0x1f ;
/*3f10*/ SHFL.DOWN PT, R2, R12, 0x4, 0x1f ;
/*3f20*/ DADD R14, R8, R14 ;
/*3f30*/ SHFL.DOWN PT, R7, R15, 0x2, 0x1f ;
/*3f40*/ DADD R4, R10, R16 ;
/*3f50*/ SHFL.DOWN PT, R6, R14, 0x2, 0x1f ;
/*3f60*/ SHFL.DOWN PT, R19, R5, 0x2, 0x1f ;
/*3f70*/ DADD R2, R12, R2 ;
/*3f80*/ SHFL.DOWN PT, R18, R4, 0x2, 0x1f ;
/*3f90*/ SHFL.DOWN PT, R35, R3, 0x2, 0x1f ;
/*3fa0*/ SHFL.DOWN PT, R34, R2, 0x2, 0x1f ;
/*3fb0*/ DADD R38, R14, R6 ;
/*3fc0*/ SHFL.DOWN PT, R17, R39, 0x1, 0x1f ;
/*3fd0*/ DADD R18, R4, R18 ;
/*3fe0*/ SHFL.DOWN PT, R16, R38, 0x1, 0x1f ;
/*3ff0*/ SHFL.DOWN PT, R37, R19, 0x1, 0x1f ;
/*4000*/ DADD R34, R2, R34 ;
/*4010*/ SHFL.DOWN PT, R36, R18, 0x1, 0x1f ;
/*4020*/ SHFL.DOWN PT, R29, R35, 0x1, 0x1f ;
/*4030*/ SHFL.DOWN PT, R4, R34, 0x1, 0x1f ;
.L_x_767:
/*4040*/ MOV R5, R29 ;
/*4050*/ DADD R16, R16, R38 ;
/*4060*/ DADD R18, R36, R18 ;
/*4070*/ STL.64 [R1], R16 ;
/*4080*/ DADD R34, R4, R34 ;
/*4090*/ STL.64 [R1+0x8], R18 ;
/*40a0*/ STL.64 [R1+0x10], R34 ;
/*40b0*/ BRA `(.L_x_662) ;
.L_x_660:
/*40c0*/ ISETP.GE.U32.AND P0, PT, R42, 0x2, PT ;
/*40d0*/ @!P0 BRA `(.L_x_662) ;
/*40e0*/ BSSY B6, `(.L_x_663) ;
/*40f0*/ SHF.R.U32.HI R29, RZ, 0x1, R42 ;
.L_x_665:
/*4100*/ BRA.DIV ~URZ, `(.L_x_664) ;
/*4110*/ SHFL.DOWN PT, R39, R17, R29, 0x1f ;
/*4120*/ SHFL.DOWN PT, R38, R16, R29, 0x1f ;
/*4130*/ SHFL.DOWN PT, R37, R19, R29, 0x1f ;
/*4140*/ SHFL.DOWN PT, R36, R18, R29, 0x1f ;
/*4150*/ SHFL.DOWN PT, R33, R35, R29, 0x1f ;
/*4160*/ SHFL.DOWN PT, R2, R34, R29, 0x1f ;
.L_x_768:
/*4170*/ LOP3.LUT R0, R29, 0xffff, RZ, 0xc0, !PT ;
/*4180*/ IMAD.MOV.U32 R3, RZ, RZ, R33 ;
/*4190*/ IADD3 R29, R42, 0x1, RZ ;
/*41a0*/ ISETP.GE.U32.AND P0, PT, R25, R0, PT ;
/*41b0*/ SHF.R.U32.HI R0, RZ, 0x1, R29.reuse ;
/*41c0*/ SHF.R.U32.HI R29, RZ, 0x2, R29 ;
/*41d0*/ @!P0 DADD R38, R38, R16 ;
/*41e0*/ @!P0 DADD R36, R36, R18 ;
/*41f0*/ @!P0 DADD R2, R2, R34 ;
/*4200*/ ISETP.GT.U32.AND P0, PT, R42, 0x2, PT ;
/*4210*/ IMAD.MOV.U32 R42, RZ, RZ, R0 ;
/*4220*/ MOV R16, R38 ;
/*4230*/ IMAD.MOV.U32 R17, RZ, RZ, R39 ;
/*4240*/ IMAD.MOV.U32 R18, RZ, RZ, R36 ;
/*4250*/ IMAD.MOV.U32 R19, RZ, RZ, R37 ;
/*4260*/ IMAD.MOV.U32 R34, RZ, RZ, R2 ;
/*4270*/ MOV R35, R3 ;
/*4280*/ @P0 BRA `(.L_x_665) ;
/*4290*/ BSYNC B6 ;
.L_x_663:
/*42a0*/ STL.64 [R1+0x10], R2 ;
/*42b0*/ IMAD.MOV.U32 R18, RZ, RZ, R36 ;
/*42c0*/ MOV R16, R38 ;
/*42d0*/ IMAD.MOV.U32 R19, RZ, RZ, R37 ;
/*42e0*/ STL.64 [R1], R38 ;
/*42f0*/ MOV R17, R39 ;
/*4300*/ STL.64 [R1+0x8], R36 ;
.L_x_662:
/*4310*/ BSYNC B7 ;
.L_x_659:
/*4320*/ ISETP.GE.U32.AND P0, PT, R32, 0x21, PT ;
/*4330*/ @!P0 BRA `(.L_x_666) ;
/*4340*/ BSSY B7, `(.L_x_667) ;
/*4350*/ MOV R2, RZ ;
.L_x_680:
/*4360*/ LDS R0, [`(IsSPMDMode)] ;
/*4370*/ YIELD (*"RELOCATOR OPCODE,YIELD,280"*);
/*4380*/ BSSY B6, `(.L_x_668) ;
/*4390*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*43a0*/ @P0 BRA `(.L_x_669) ;
/*43b0*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*43c0*/ ISETP.NE.AND P1, PT, R22, UR38, PT ;
/*43d0*/ ISETP.NE.OR P0, PT, R0, RZ, P1 ;
/*43e0*/ @P0 BRA `(.L_x_670) ;
/*43f0*/ @!PT LDS RZ, [RZ] ;
/*4400*/ @!PT LDS RZ, [RZ] ;
/*4410*/ @!PT LDS RZ, [RZ] ;
/*4420*/ @!PT LDS RZ, [RZ] ;
/*4430*/ MEMBAR.SC.GPU ;
/*4440*/ ERRBAR;
/*4450*/ CCTL.IVALL ;
/*4460*/ BRA `(.L_x_671) ;
.L_x_670:
/*4470*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*4480*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*4490*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*44a0*/ SEL R5, R30, R3, !P1 ;
/*44b0*/ SEL R5, R5, 0x1, !P0 ;
/*44c0*/ BRA.DIV ~URZ, `(.L_x_672) ;
/*44d0*/ BAR.SYNC 0x7, R5 ;
/*44e0*/ BRA `(.L_x_671) ;
.L_x_669:
/*44f0*/ WARPSYNC 0xffffffff ;
/*4500*/ BAR.SYNC 0x0 ;
.L_x_671:
/*4510*/ BSYNC B6 ;
.L_x_668:
/*4520*/ ISETP.NE.AND P0, PT, R31, RZ, PT ;
/*4530*/ BSSY B0, `(.L_x_673) ;
/*4540*/ IADD3 R29, R1, R2, RZ ;
/*4550*/ @P0 BRA `(.L_x_674) ;
/*4560*/ LDL R3, [R29] ;
/*4570*/ STS [R28+`(__openmp_nvptx_data_transfer_temporary_storage)], R3 ;
.L_x_674:
/*4580*/ BSYNC B0 ;
.L_x_673:
/*4590*/ LDS R0, [`(IsSPMDMode)] ;
/*45a0*/ BSSY B6, `(.L_x_675) ;
/*45b0*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*45c0*/ @P0 BRA `(.L_x_676) ;
/*45d0*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*45e0*/ ISETP.NE.AND P1, PT, R22, UR38, PT ;
/*45f0*/ ISETP.NE.OR P0, PT, R0, RZ, P1 ;
/*4600*/ @P0 BRA `(.L_x_677) ;
/*4610*/ @!PT LDS RZ, [RZ] ;
/*4620*/ @!PT LDS RZ, [RZ] ;
/*4630*/ @!PT LDS RZ, [RZ] ;
/*4640*/ @!PT LDS RZ, [RZ] ;
/*4650*/ MEMBAR.SC.GPU ;
/*4660*/ ERRBAR;
/*4670*/ CCTL.IVALL ;
/*4680*/ BRA `(.L_x_678) ;
.L_x_677:
/*4690*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*46a0*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*46b0*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*46c0*/ SEL R5, R30, R3, !P1 ;
/*46d0*/ SEL R5, R5, 0x1, !P0 ;
/*46e0*/ BRA.DIV ~URZ, `(.L_x_679) ;
/*46f0*/ BAR.SYNC 0x7, R5 ;
/*4700*/ BRA `(.L_x_678) ;
.L_x_676:
/*4710*/ WARPSYNC 0xffffffff ;
/*4720*/ BAR.SYNC 0x0 ;
.L_x_678:
/*4730*/ BSYNC B6 ;
.L_x_675:
/*4740*/ ISETP.GE.U32.AND P0, PT, R22, R41, PT ;
/*4750*/ IADD3 R2, R2, 0x4, RZ ;
/*4760*/ @!P0 LDS R0, [R27+`(__openmp_nvptx_data_transfer_temporary_storage)] ;
/*4770*/ @!P0 STL [R29], R0 ;
/*4780*/ ISETP.NE.AND P0, PT, R2, 0x18, PT ;
/*4790*/ @P0 BRA `(.L_x_680) ;
/*47a0*/ BSYNC B7 ;
.L_x_667:
/*47b0*/ ISETP.GE.U32.AND P0, PT, R40, 0x40, PT ;
/*47c0*/ BSSY B7, `(.L_x_666) ;
/*47d0*/ ISETP.GT.U32.OR P0, PT, R22, 0x1f, !P0 ;
/*47e0*/ @P0 BRA `(.L_x_681) ;
/*47f0*/ LDL.64 R16, [R1+0x10] ;
/*4800*/ LDL.64 R18, [R1] ;
/*4810*/ LDL.64 R32, [R1+0x8] ;
/*4820*/ BSSY B6, `(.L_x_682) ;
/*4830*/ SHF.R.U32.HI R2, RZ, 0x6, R40 ;
.L_x_684:
/*4840*/ PRMT R39, R2, 0x9910, RZ ;
/*4850*/ BRA.DIV ~URZ, `(.L_x_683) ;
/*4860*/ SHFL.DOWN PT, R37, R19, R39, 0x1f ;
/*4870*/ SHFL.DOWN PT, R36, R18, R39, 0x1f ;
/*4880*/ SHFL.DOWN PT, R35, R33, R39, 0x1f ;
/*4890*/ SHFL.DOWN PT, R34, R32, R39, 0x1f ;
/*48a0*/ SHFL.DOWN PT, R29, R17, R39, 0x1f ;
/*48b0*/ SHFL.DOWN PT, R4, R16, R39, 0x1f ;
.L_x_769:
/*48c0*/ LOP3.LUT R2, R2, 0xffff, RZ, 0xc0, !PT ;
/*48d0*/ IMAD.MOV.U32 R5, RZ, RZ, R29 ;
/*48e0*/ LOP3.LUT R3, R22, 0xffff, RZ, 0xc0, !PT ;
/*48f0*/ ISETP.GE.U32.AND P0, PT, R3, R2, PT ;
/*4900*/ IADD3 R2, R41, 0x1, RZ ;
/*4910*/ SHF.R.U32.HI R0, RZ, 0x1, R2.reuse ;
/*4920*/ SHF.R.U32.HI R2, RZ, 0x2, R2 ;
/*4930*/ @!P0 DADD R36, R36, R18 ;
/*4940*/ @!P0 DADD R34, R34, R32 ;
/*4950*/ @!P0 DADD R4, R4, R16 ;
/*4960*/ ISETP.GT.U32.AND P0, PT, R41, 0x2, PT ;
/*4970*/ IMAD.MOV.U32 R41, RZ, RZ, R0 ;
/*4980*/ MOV R19, R37 ;
/*4990*/ IMAD.MOV.U32 R18, RZ, RZ, R36 ;
/*49a0*/ IMAD.MOV.U32 R32, RZ, RZ, R34 ;
/*49b0*/ IMAD.MOV.U32 R33, RZ, RZ, R35 ;
/*49c0*/ MOV R16, R4 ;
/*49d0*/ IMAD.MOV.U32 R17, RZ, RZ, R5 ;
/*49e0*/ @P0 BRA `(.L_x_684) ;
/*49f0*/ BSYNC B6 ;
.L_x_682:
/*4a00*/ STL.64 [R1+0x10], R4 ;
/*4a10*/ IMAD.MOV.U32 R18, RZ, RZ, R32 ;
/*4a20*/ MOV R16, R36 ;
/*4a30*/ IMAD.MOV.U32 R19, RZ, RZ, R33 ;
/*4a40*/ MOV R17, R37 ;
.L_x_681:
/*4a50*/ BSYNC B7 ;
.L_x_666:
/*4a60*/ ISETP.NE.AND P0, PT, R22, RZ, PT ;
/*4a70*/ P2R R29, PR, RZ, 0x1 ;
/*4a80*/ @P0 BRA `(.L_x_685) ;
/*4a90*/ LDL.64 R34, [R1+0x10] ;
.L_x_658:
/*4aa0*/ LDS.64 R2, [`($__ratioGradRef_local_shared__2692)] ;
/*4ab0*/ LDS.64 R6, [`(($__ratioGradRef_local_shared__2692 + 0x8))] ;
/*4ac0*/ LDS.64 R4, [`(($__ratioGradRef_local_shared__2692 + 0x10))] ;
/*4ad0*/ DADD R2, R2, R16 ;
/*4ae0*/ DADD R18, R18, R6 ;
/*4af0*/ STS.64 [`($__ratioGradRef_local_shared__2692)], R2 ;
/*4b00*/ DADD R4, R4, R34 ;
/*4b10*/ STS.64 [`(($__ratioGradRef_local_shared__2692 + 0x8))], R18 ;
/*4b20*/ STS.64 [`(($__ratioGradRef_local_shared__2692 + 0x10))], R4 ;
.L_x_685:
/*4b30*/ BSYNC B8 ;
.L_x_647:
/*4b40*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], RZ ;
/*4b50*/ BRA `(.L_x_645) ;
.L_x_646:
/*4b60*/ MOV R2, 0x1 ;
/*4b70*/ STS [`(_ZN4ompx5state9TeamStateE_$_2)], R0 ;
/*4b80*/ STS [`(_ZN4ompx5state9TeamStateE_$_1)], R2 ;
/*4b90*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], R2 ;
/*4ba0*/ BAR.SYNC 0x8 ;
/*4bb0*/ BAR.SYNC 0x8 ;
/*4bc0*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], RZ ;
/*4bd0*/ STS [`(_ZN4ompx5state9TeamStateE_$_1)], RZ ;
/*4be0*/ STS [`(_ZN4ompx5state9TeamStateE_$_2)], R2 ;
.L_x_645:
/*4bf0*/ LDS R2, [`($__iw_shared5_$_0__2688)] ;
/*4c00*/ ISETP.NE.AND P0, PT, R29, RZ, PT ;
/*4c10*/ IMAD.MOV.U32 R3, RZ, RZ, 0x18 ;
/*4c20*/ WARPSYNC 0xffffffff ;
/*4c30*/ LDS.64 R4, [`(($__ratioGradRef_local_shared__2692 + 0x10))] ;
/*4c40*/ ULDC UR4, c[0x0][0x0] ;
/*4c50*/ UIADD3 UR4, UR4, -0x20, URZ ;
/*4c60*/ LDS.128 R8, [`($__ratioGradRef_local_shared__2692)] ;
/*4c70*/ IMAD.U32 R30, RZ, RZ, UR4 ;
/*4c80*/ @!P0 STS.64 [`($__c_ratio_shared8_$_0__2702)], RZ ;
/*4c90*/ IMAD.WIDE.U32 R2, R2, R3, c[0x0][0x1a8] ;
/*4ca0*/ STG.E.64 [R2.64+0x10], R4 ;
/*4cb0*/ STG.E.64 [R2.64+0x8], R10 ;
/*4cc0*/ STG.E.64 [R2.64], R8 ;
/*4cd0*/ BAR.SYNC 0x0 ;
/*4ce0*/ LDS R0, [`(IsSPMDMode)] ;
/*4cf0*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*4d00*/ IMAD.MOV.U32 R0, RZ, RZ, R30 ;
/*4d10*/ @P0 IMAD.MOV R0, RZ, RZ, c[0x0][0x0] ;
/*4d20*/ ISETP.GE.U32.AND P1, PT, R0.reuse, 0x20, PT ;
/*4d30*/ LOP3.LUT R2, R0, 0xffffffe0, RZ, 0xc0, !PT ;
/*4d40*/ SEL R2, R2, 0x1, P1 ;
/*4d50*/ SEL R3, R2, c[0x0][0x0], !P0 ;
/*4d60*/ ISETP.NE.AND P1, PT, R3, R0, PT ;
/*4d70*/ SEL R0, R3, RZ, P1 ;
/*4d80*/ @!P0 BRA `(.L_x_686) ;
/*4d90*/ BAR.SYNC 0x0 ;
/*4da0*/ ISETP.NE.AND P0, PT, R22, RZ, PT ;
/*4db0*/ BSSY B9, `(.L_x_687) ;
/*4dc0*/ IADD3 R3, R0, -0x1, RZ ;
/*4dd0*/ P2R R2, PR, RZ, 0x1 ;
/*4de0*/ @!P0 IMAD.MOV.U32 R2, RZ, RZ, 0x1 ;
/*4df0*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_2)], R0 ;
/*4e00*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_1)], R2 ;
/*4e10*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_0)], R2 ;
/*4e20*/ BAR.SYNC 0x0 ;
/*4e30*/ BAR.SYNC 0x0 ;
/*4e40*/ ISETP.GE.U32.AND P0, PT, R3, R22, PT ;
/*4e50*/ @!P0 BRA `(.L_x_688) ;
/*4e60*/ LDS.64 R4, [`($__psiMinv_cols6_shared_$_0__2696)] ;
/*4e70*/ ISETP.NE.U32.AND P0, PT, R4, RZ, PT ;
/*4e80*/ ISETP.NE.AND.EX P0, PT, R5, RZ, PT, P0 ;
/*4e90*/ @!P0 BRA `(.L_x_688) ;
/*4ea0*/ LDS R2, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*4eb0*/ IADD3 R0, P1, R4, -0x1, RZ ;
/*4ec0*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_1)] ;
/*4ed0*/ ISETP.NE.AND P2, PT, R2.reuse, 0x1, PT ;
/*4ee0*/ ISETP.NE.AND P3, PT, R2, RZ, PT ;
/*4ef0*/ ISETP.NE.AND P0, PT, R3, R2, PT ;
/*4f00*/ IADD3.X R3, R5, -0x1, RZ, P1, !PT ;
/*4f10*/ SEL R6, R22, RZ, !P0 ;
/*4f20*/ P2R R50, PR, RZ, 0x8 ;
/*4f30*/ SEL R19, R6, RZ, P3 ;
/*4f40*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1 ;
/*4f50*/ @P2 BRA `(.L_x_689) ;
/*4f60*/ LDS R6, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*4f70*/ ISETP.NE.AND P0, PT, R6, RZ, PT ;
/*4f80*/ @!P0 LDS R7, [`(IsSPMDMode)] ;
/*4f90*/ ISETP.NE.AND P1, PT, R7, RZ, !P0 ;
/*4fa0*/ @!P0 IMAD.MOV.U32 R7, RZ, RZ, R30 ;
/*4fb0*/ @P1 IADD3 R7, RZ, c[0x0][0x0], RZ ;
/*4fc0*/ @!P0 IMAD.MOV.U32 R6, RZ, RZ, R7 ;
.L_x_689:
/*4fd0*/ ISETP.GE.U32.AND P0, PT, R0, R19, PT ;
/*4fe0*/ BSSY B0, `(.L_x_690) ;
/*4ff0*/ SHF.R.S32.HI R20, RZ, 0x1f, R19 ;
/*5000*/ CS2R R16, SRZ ;
/*5010*/ ISETP.GE.U32.AND.EX P0, PT, R3, R20, PT, P0 ;
/*5020*/ @!P0 BRA `(.L_x_691) ;
/*5030*/ LDS R17, [`($__iw_shared5_$_0__2688)] ;
/*5040*/ LDS.64 R8, [`($__psiMinv_temp_list_devptr5_shared_$_0__2694)] ;
/*5050*/ LDS.64 R10, [`($__psiV_temp_list_ptr3_shared4_$_0__2686)] ;
/*5060*/ LDS R18, [`($__WorkingIndex7_shared7_$_0__2698)] ;
/*5070*/ IMAD.WIDE.U32 R12, R17, 0x8, R8 ;
/*5080*/ IMAD.WIDE.U32 R16, R17, 0x8, R10 ;
/*5090*/ LD.E.64 R14, [R12.64] ;
/*50a0*/ LD.E.64 R16, [R16.64] ;
/*50b0*/ MOV R7, R19 ;
/*50c0*/ IMAD.WIDE R18, R18, 0x8, RZ ;
/*50d0*/ SHF.L.U64.HI R8, R4.reuse, 0x3, R5 ;
/*50e0*/ MOV R10, R20 ;
/*50f0*/ IMAD.SHL.U32 R9, R4, 0x8, RZ ;
/*5100*/ SHF.R.S32.HI R13, RZ, 0x1f, R6 ;
/*5110*/ IMAD R4, R8, R7.reuse, RZ ;
/*5120*/ MOV R29, R7.reuse ;
/*5130*/ IMAD.WIDE.U32 R18, R9, R7, R18 ;
/*5140*/ SHF.L.U64.HI R21, R6, 0x3, R13 ;
/*5150*/ IMAD R5, R9, R10, R4 ;
/*5160*/ IADD3 R11, P1, R14, R18, RZ ;
/*5170*/ IMAD R18, R8, R6, RZ ;
/*5180*/ IADD3.X R14, R15, R19, R5, P1, !PT ;
/*5190*/ IMAD.WIDE.U32 R4, R9, R6, RZ ;
/*51a0*/ LEA R12, P0, R7, R16, 0x3 ;
/*51b0*/ IMAD R15, R13, R9, R18 ;
/*51c0*/ LEA.HI.X R20, R7.reuse, R17, R10.reuse, 0x3, P0 ;
/*51d0*/ IMAD.SHL.U32 R19, R6, 0x8, RZ ;
/*51e0*/ IADD3 R58, P0, R7, 0x1, RZ ;
/*51f0*/ CS2R R16, SRZ ;
/*5200*/ IMAD.MOV.U32 R18, RZ, RZ, R10 ;
/*5210*/ IADD3 R15, R5, R15, RZ ;
/*5220*/ IADD3.X R60, RZ, R10, RZ, P0, !PT ;
.L_x_695:
/*5230*/ ISETP.GE.U32.AND P3, PT, R29, R58, PT ;
/*5240*/ IMAD.MOV.U32 R54, RZ, RZ, R12 ;
/*5250*/ MOV R52, R11 ;
/*5260*/ IMAD.MOV.U32 R53, RZ, RZ, R20 ;
/*5270*/ ISETP.GE.U32.AND.EX P3, PT, R18, R60, PT, P3 ;
/*5280*/ IMAD.MOV.U32 R49, RZ, RZ, R14 ;
/*5290*/ IMAD.MOV.U32 R51, RZ, RZ, R29 ;
/*52a0*/ IMAD.MOV.U32 R55, RZ, RZ, R18 ;
/*52b0*/ @P3 IMAD.MOV.U32 R34, RZ, RZ, R54 ;
/*52c0*/ @P3 IADD3 R51, P0, R51, 0x1, RZ ;
/*52d0*/ @P3 IMAD.MOV.U32 R35, RZ, RZ, R53 ;
/*52e0*/ @P3 IMAD.MOV.U32 R32, RZ, RZ, R52 ;
/*52f0*/ @P3 IADD3.X R55, RZ, R55, RZ, P0, !PT ;
/*5300*/ @P3 IMAD.MOV.U32 R33, RZ, RZ, R49 ;
/*5310*/ @P3 IADD3 R54, P0, R54, 0x8, RZ ;
/*5320*/ @P3 LD.E.64 R34, [R34.64] ;
/*5330*/ @P3 LD.E.64 R32, [R32.64] ;
/*5340*/ @P3 IMAD.X R53, RZ, RZ, R53, P0 ;
/*5350*/ @P3 IADD3 R52, P0, R52, R9, RZ ;
/*5360*/ @P3 IADD3.X R49, R49, R8, RZ, P0, !PT ;
/*5370*/ IADD3 R36, P0, R58, -R51, RZ ;
/*5380*/ ISETP.LE.U32.AND P1, PT, R36, 0x3, PT ;
/*5390*/ IMAD.X R37, R60, 0x1, ~R55, P0 ;
/*53a0*/ ISETP.GT.U32.AND P0, PT, R58, R51, PT ;
/*53b0*/ ISETP.GT.U32.AND.EX P0, PT, R60, R55, PT, P0 ;
/*53c0*/ ISETP.LE.U32.OR.EX P0, PT, R37, RZ, !P0, P1 ;
/*53d0*/ BSSY B1, `(.L_x_692) ;
/*53e0*/ @P3 DFMA R16, R32, R34, R16 ;
/*53f0*/ PLOP3.LUT P3, PT, P3, PT, PT, 0x8, 0x0 ;
/*5400*/ @P0 BRA `(.L_x_693) ;
/*5410*/ IADD3 R62, P0, R58, -0x3, RZ ;
/*5420*/ PLOP3.LUT P3, PT, PT, PT, PT, 0x8, 0x0 ;
/*5430*/ IADD3.X R64, R60, -0x1, RZ, P0, !PT ;
.L_x_694:
/*5440*/ IMAD.MOV.U32 R32, RZ, RZ, R54 ;
/*5450*/ IADD3 R48, P0, R52, R9.reuse, RZ ;
/*5460*/ IMAD.MOV.U32 R33, RZ, RZ, R53 ;
/*5470*/ IMAD.MOV.U32 R42, RZ, RZ, R52 ;
/*5480*/ IMAD.MOV.U32 R43, RZ, RZ, R49 ;
/*5490*/ LD.E.64 R52, [R32.64] ;
/*54a0*/ LD.E.64 R46, [R42.64] ;
/*54b0*/ IMAD.X R49, R49, 0x1, R8, P0 ;
/*54c0*/ IADD3 R56, P0, R48, R9, RZ ;
/*54d0*/ LD.E.64 R36, [R32.64+0x8] ;
/*54e0*/ IADD3.X R57, R49, R8, RZ, P0, !PT ;
/*54f0*/ LD.E.64 R48, [R48.64] ;
/*5500*/ IADD3 R34, P0, R56, R9, RZ ;
/*5510*/ LD.E.64 R38, [R32.64+0x10] ;
/*5520*/ IMAD.X R35, R57, 0x1, R8, P0 ;
/*5530*/ LD.E.64 R40, [R56.64] ;
/*5540*/ LD.E.64 R42, [R32.64+0x18] ;
/*5550*/ LD.E.64 R44, [R34.64] ;
/*5560*/ IADD3 R54, P0, R32, 0x20, RZ ;
/*5570*/ DFMA R46, R46, R52, R16 ;
/*5580*/ IMAD.X R53, RZ, RZ, R33, P0 ;
/*5590*/ IADD3 R51, P0, R51, 0x4, RZ ;
/*55a0*/ IADD3.X R55, RZ, R55, RZ, P0, !PT ;
/*55b0*/ IADD3 R52, P0, R34, R9, RZ ;
/*55c0*/ DFMA R36, R48, R36, R46 ;
/*55d0*/ IMAD.X R49, R35, 0x1, R8, P0 ;
/*55e0*/ ISETP.GE.U32.AND P0, PT, R51, R62, PT ;
/*55f0*/ ISETP.GE.U32.AND.EX P0, PT, R55, R64, PT, P0 ;
/*5600*/ DFMA R36, R40, R38, R36 ;
/*5610*/ DFMA R16, R44, R42, R36 ;
/*5620*/ @!P0 BRA `(.L_x_694) ;
.L_x_693:
/*5630*/ BSYNC B1 ;
.L_x_692:
/*5640*/ IADD3 R32, P0, R58, -R51, RZ ;
/*5650*/ ISETP.LE.U32.AND P1, PT, R32, 0x1, PT ;
/*5660*/ IMAD.X R33, R60, 0x1, ~R55, P0 ;
/*5670*/ ISETP.GT.U32.AND P0, PT, R58, R51, PT ;
/*5680*/ ISETP.GT.U32.AND.EX P0, PT, R60, R55, PT, P0 ;
/*5690*/ ISETP.LE.U32.OR.EX P1, PT, R33, RZ, !P0, P1 ;
/*56a0*/ @!P1 IADD3 R51, P0, R51, 0x2, RZ ;
/*56b0*/ @!P1 IMAD.MOV.U32 R46, RZ, RZ, R54 ;
/*56c0*/ @!P1 PLOP3.LUT P3, PT, PT, PT, PT, 0x8, 0x0 ;
/*56d0*/ @!P1 IMAD.MOV.U32 R47, RZ, RZ, R53 ;
/*56e0*/ @!P1 IMAD.X R55, RZ, RZ, R55, P0 ;
/*56f0*/ @!P1 IADD3 R40, P0, R52, R9, RZ ;
/*5700*/ @!P1 IMAD.MOV.U32 R44, RZ, RZ, R52 ;
/*5710*/ @!P1 LD.E.64 R38, [R46.64] ;
/*5720*/ @!P1 IMAD.MOV.U32 R45, RZ, RZ, R49 ;
/*5730*/ @!P1 IMAD.X R41, R49, 0x1, R8, P0 ;
/*5740*/ ISETP.LT.U32.AND P0, PT, R51, R58, PT ;
/*5750*/ @!P1 LD.E.64 R42, [R46.64+0x8] ;
/*5760*/ ISETP.LT.U32.OR.EX P0, PT, R55, R60, P3, P0 ;
/*5770*/ @!P1 LD.E.64 R36, [R44.64] ;
/*5780*/ @!P1 IADD3 R54, P3, R54, 0x10, RZ ;
/*5790*/ @!P1 IMAD.X R53, RZ, RZ, R53, P3 ;
/*57a0*/ @!P1 IADD3 R52, P3, R40, R9, RZ ;
/*57b0*/ @!P1 IMAD.X R49, R41, 0x1, R8, P3 ;
/*57c0*/ @!P1 LD.E.64 R40, [R40.64] ;
/*57d0*/ IMAD.MOV.U32 R32, RZ, RZ, R52 ;
/*57e0*/ IMAD.MOV.U32 R33, RZ, RZ, R49 ;
/*57f0*/ IMAD.MOV.U32 R55, RZ, RZ, R53 ;
/*5800*/ @P0 LD.E.64 R32, [R32.64] ;
/*5810*/ @P0 LD.E.64 R34, [R54.64] ;
/*5820*/ @!P1 DFMA R36, R36, R38, R16 ;
/*5830*/ @!P1 DFMA R16, R40, R42, R36 ;
/*5840*/ IADD3 R7, P1, R6, R7, RZ ;
/*5850*/ IMAD.X R10, R13, 0x1, R10, P1 ;
/*5860*/ ISETP.LT.U32.AND P1, PT, R7, R0, PT ;
/*5870*/ ISETP.LT.U32.AND.EX P1, PT, R10, R3, PT, P1 ;
/*5880*/ @P0 DFMA R16, R32, R34, R16 ;
/*5890*/ IADD3 R29, P0, R6, R29, RZ ;
/*58a0*/ SEL R7, R7, R0, P1 ;
/*58b0*/ SEL R10, R10, R3, P1 ;
/*58c0*/ IMAD.X R18, R13, 0x1, R18, P0 ;
/*58d0*/ IADD3 R12, P0, R12, R19, RZ ;
/*58e0*/ IADD3 R58, P1, R7, 0x1, RZ ;
/*58f0*/ IMAD.X R20, R20, 0x1, R21, P0 ;
/*5900*/ ISETP.GE.U32.AND P0, PT, R29, R58, PT ;
/*5910*/ IMAD.X R60, RZ, RZ, R10, P1 ;
/*5920*/ ISETP.GE.U32.AND.EX P0, PT, R18, R60, PT, P0 ;
/*5930*/ IADD3 R11, P1, R11, R4, RZ ;
/*5940*/ IMAD.X R14, R14, 0x1, R15, P1 ;
/*5950*/ @!P0 BRA `(.L_x_695) ;
.L_x_691:
/*5960*/ BSYNC B0 ;
.L_x_690:
/*5970*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*5980*/ MOV R35, 0x1 ;
/*5990*/ @P2 BRA `(.L_x_696) ;
/*59a0*/ LDS R35, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*59b0*/ ISETP.NE.AND P1, PT, R35, RZ, PT ;
/*59c0*/ @!P1 LDS R0, [`(IsSPMDMode)] ;
/*59d0*/ ISETP.NE.AND P2, PT, R0, RZ, !P1 ;
/*59e0*/ @!P1 IMAD.MOV.U32 R0, RZ, RZ, c[0x0][0x0] ;
/*59f0*/ @!P1 IADD3 R30, R0, -0x20, RZ ;
/*5a00*/ @!P1 IMAD.MOV.U32 R0, RZ, RZ, R30 ;
/*5a10*/ @P2 IADD3 R0, RZ, c[0x0][0x0], RZ ;
/*5a20*/ @!P1 IMAD.MOV.U32 R35, RZ, RZ, R0 ;
.L_x_696:
/*5a30*/ ISETP.NE.AND P1, PT, R35, 0x1, PT ;
/*5a40*/ BSSY B8, `(.L_x_697) ;
/*5a50*/ @!P1 BRA `(.L_x_698) ;
/*5a60*/ IADD3 R32, R35.reuse, 0x1f, RZ ;
/*5a70*/ BSSY B6, `(.L_x_699) ;
/*5a80*/ LOP3.LUT P0, R18, R35, 0x1f, RZ, 0xc0, !PT ;
/*5a90*/ SHF.R.U32.HI R33, RZ, 0x5, R32 ;
/*5aa0*/ IADD3 R3, R33, -0x1, RZ ;
/*5ab0*/ ISETP.GE.U32.AND P1, PT, R26, R3, PT ;
/*5ac0*/ @P0 BRA P1, `(.L_x_700) ;
/*5ad0*/ BRA.DIV ~URZ, `(.L_x_701) ;
/*5ae0*/ SHFL.DOWN PT, R5, R17, 0x10, 0x1f ;
/*5af0*/ SHFL.DOWN PT, R4, R16, 0x10, 0x1f ;
/*5b00*/ DADD R6, R16, R4 ;
/*5b10*/ SHFL.DOWN PT, R5, R7, 0x8, 0x1f ;
/*5b20*/ SHFL.DOWN PT, R4, R6, 0x8, 0x1f ;
/*5b30*/ DADD R8, R6, R4 ;
/*5b40*/ SHFL.DOWN PT, R5, R9, 0x4, 0x1f ;
/*5b50*/ SHFL.DOWN PT, R4, R8, 0x4, 0x1f ;
/*5b60*/ DADD R10, R8, R4 ;
/*5b70*/ SHFL.DOWN PT, R5, R11, 0x2, 0x1f ;
/*5b80*/ SHFL.DOWN PT, R4, R10, 0x2, 0x1f ;
/*5b90*/ DADD R18, R10, R4 ;
/*5ba0*/ SHFL.DOWN PT, R17, R19, 0x1, 0x1f ;
/*5bb0*/ SHFL.DOWN PT, R4, R18, 0x1, 0x1f ;
.L_x_770:
/*5bc0*/ MOV R5, R17 ;
/*5bd0*/ DADD R16, R4, R18 ;
/*5be0*/ BRA `(.L_x_702) ;
.L_x_700:
/*5bf0*/ ISETP.GE.U32.AND P0, PT, R18, 0x2, PT ;
/*5c00*/ @!P0 BRA `(.L_x_702) ;
/*5c10*/ SHF.R.U32.HI R19, RZ, 0x1, R18 ;
.L_x_704:
/*5c20*/ LOP3.LUT R0, R19, 0xffff, RZ, 0xc0, !PT ;
/*5c30*/ ISETP.GE.U32.AND P0, PT, R25, R0, PT ;
/*5c40*/ P2R R34, PR, RZ, 0x1 ;
/*5c50*/ BRA.DIV ~URZ, `(.L_x_703) ;
/*5c60*/ SHFL.DOWN PT, R29, R17, R19, 0x1f ;
/*5c70*/ SHFL.DOWN PT, R4, R16, R19, 0x1f ;
.L_x_771:
/*5c80*/ ISETP.NE.AND P0, PT, R34, RZ, PT ;
/*5c90*/ IMAD.MOV.U32 R5, RZ, RZ, R29 ;
/*5ca0*/ IADD3 R19, R18, 0x1, RZ ;
/*5cb0*/ FSEL R16, R16, RZ, !P0 ;
/*5cc0*/ FSEL R17, R17, -RZ, !P0 ;
/*5cd0*/ ISETP.GT.U32.AND P0, PT, R18, 0x2, PT ;
/*5ce0*/ SHF.R.U32.HI R18, RZ, 0x1, R19.reuse ;
/*5cf0*/ DADD R16, R4, R16 ;
/*5d00*/ SHF.R.U32.HI R19, RZ, 0x2, R19 ;
/*5d10*/ @P0 BRA `(.L_x_704) ;
.L_x_702:
/*5d20*/ BSYNC B6 ;
.L_x_699:
/*5d30*/ ISETP.GE.U32.AND P1, PT, R35, 0x21, PT ;
/*5d40*/ BSSY B7, `(.L_x_705) ;
/*5d50*/ ISETP.NE.AND P0, PT, R50, RZ, PT ;
/*5d60*/ ISETP.EQ.AND P0, PT, R22, UR38, !P0 ;
/*5d70*/ SEL R18, R22, RZ, !P0 ;
/*5d80*/ @!P1 BRA `(.L_x_706) ;
/*5d90*/ LDS R0, [`(IsSPMDMode)] ;
/*5da0*/ ISETP.NE.AND P2, PT, R22, UR38, PT ;
/*5db0*/ BSSY B6, `(.L_x_707) ;
/*5dc0*/ ISETP.NE.AND P1, PT, R0, RZ, PT ;
/*5dd0*/ P2R R0, PR, RZ, 0x4 ;
/*5de0*/ @P1 BRA `(.L_x_708) ;
/*5df0*/ @!P0 BRA `(.L_x_709) ;
/*5e00*/ @!PT LDS RZ, [RZ] ;
/*5e10*/ @!PT LDS RZ, [RZ] ;
/*5e20*/ @!PT LDS RZ, [RZ] ;
/*5e30*/ @!PT LDS RZ, [RZ] ;
/*5e40*/ MEMBAR.SC.GPU ;
/*5e50*/ ERRBAR;
/*5e60*/ CCTL.IVALL ;
/*5e70*/ BRA `(.L_x_710) ;
.L_x_709:
/*5e80*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*5e90*/ ISETP.NE.AND P0, PT, R2, 0x1, PT ;
/*5ea0*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*5eb0*/ SEL R5, R30, R3, !P1 ;
/*5ec0*/ SEL R5, R5, 0x1, !P0 ;
/*5ed0*/ BRA.DIV ~URZ, `(.L_x_711) ;
/*5ee0*/ BAR.SYNC 0x7, R5 ;
/*5ef0*/ BRA `(.L_x_710) ;
.L_x_708:
/*5f00*/ WARPSYNC 0xffffffff ;
/*5f10*/ BAR.SYNC 0x0 ;
.L_x_710:
/*5f20*/ BSYNC B6 ;
.L_x_707:
/*5f30*/ ISETP.NE.AND P0, PT, R31, RZ, PT ;
/*5f40*/ BSSY B6, `(.L_x_712) ;
/*5f50*/ @!P0 STS [R28+`(__openmp_nvptx_data_transfer_temporary_storage)], R16 ;
/*5f60*/ LDS R0, [`(IsSPMDMode)] ;
/*5f70*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*5f80*/ @P0 BRA `(.L_x_713) ;
/*5f90*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*5fa0*/ ISETP.NE.AND P1, PT, R22, UR38, PT ;
/*5fb0*/ ISETP.NE.OR P0, PT, R0, RZ, P1 ;
/*5fc0*/ @P0 BRA `(.L_x_714) ;
/*5fd0*/ @!PT LDS RZ, [RZ] ;
/*5fe0*/ @!PT LDS RZ, [RZ] ;
/*5ff0*/ @!PT LDS RZ, [RZ] ;
/*6000*/ @!PT LDS RZ, [RZ] ;
/*6010*/ MEMBAR.SC.GPU ;
/*6020*/ ERRBAR;
/*6030*/ CCTL.IVALL ;
/*6040*/ BRA `(.L_x_715) ;
.L_x_714:
/*6050*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*6060*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*6070*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*6080*/ SEL R5, R30, R3, !P1 ;
/*6090*/ SEL R5, R5, 0x1, !P0 ;
/*60a0*/ BRA.DIV ~URZ, `(.L_x_716) ;
/*60b0*/ BAR.SYNC 0x7, R5 ;
/*60c0*/ BRA `(.L_x_715) ;
.L_x_713:
/*60d0*/ WARPSYNC 0xffffffff ;
/*60e0*/ BAR.SYNC 0x0 ;
.L_x_715:
/*60f0*/ BSYNC B6 ;
.L_x_712:
/*6100*/ LDS R0, [`(IsSPMDMode)] ;
/*6110*/ ISETP.GE.U32.AND P1, PT, R22, R33, PT ;
/*6120*/ P2R R2, PR, RZ, 0x2 ;
/*6130*/ @!P1 LDS R3, [R27+`(__openmp_nvptx_data_transfer_temporary_storage)] ;
/*6140*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*6150*/ @P0 WARPSYNC 0xffffffff ;
/*6160*/ @!P1 IMAD.MOV.U32 R16, RZ, RZ, R3 ;
/*6170*/ @P0 BAR.SYNC 0x0 ;
/*6180*/ @P0 BRA `(.L_x_717) ;
/*6190*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*61a0*/ ISETP.NE.AND P1, PT, R22, UR38, PT ;
/*61b0*/ BSSY B6, `(.L_x_717) ;
/*61c0*/ ISETP.EQ.AND P0, PT, R0, RZ, PT ;
/*61d0*/ @!P1 BRA P0, `(.L_x_718) ;
/*61e0*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*61f0*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*6200*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*6210*/ SEL R5, R30, R3, !P1 ;
/*6220*/ SEL R5, R5, 0x1, !P0 ;
/*6230*/ BRA.DIV ~URZ, `(.L_x_719) ;
/*6240*/ BAR.SYNC 0x7, R5 ;
/*6250*/ BRA `(.L_x_720) ;
.L_x_718:
/*6260*/ @!PT LDS RZ, [RZ] ;
/*6270*/ @!PT LDS RZ, [RZ] ;
/*6280*/ @!PT LDS RZ, [RZ] ;
/*6290*/ @!PT LDS RZ, [RZ] ;
/*62a0*/ MEMBAR.SC.GPU ;
/*62b0*/ ERRBAR;
/*62c0*/ CCTL.IVALL ;
.L_x_720:
/*62d0*/ BSYNC B6 ;
.L_x_717:
/*62e0*/ ISETP.NE.AND P0, PT, R31, RZ, PT ;
/*62f0*/ @!P0 STS [R28+`(__openmp_nvptx_data_transfer_temporary_storage)], R17 ;
/*6300*/ LDS R0, [`(IsSPMDMode)] ;
/*6310*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*6320*/ @P0 WARPSYNC 0xffffffff ;
/*6330*/ @P0 BAR.SYNC 0x0 ;
/*6340*/ @P0 BRA `(.L_x_721) ;
/*6350*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*6360*/ ISETP.NE.AND P1, PT, R22, UR38, PT ;
/*6370*/ BSSY B6, `(.L_x_721) ;
/*6380*/ ISETP.EQ.AND P0, PT, R0, RZ, PT ;
/*6390*/ @!P1 BRA P0, `(.L_x_722) ;
/*63a0*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*63b0*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*63c0*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*63d0*/ SEL R5, R30, R3, !P1 ;
/*63e0*/ SEL R5, R5, 0x1, !P0 ;
/*63f0*/ BRA.DIV ~URZ, `(.L_x_723) ;
/*6400*/ BAR.SYNC 0x7, R5 ;
/*6410*/ BRA `(.L_x_724) ;
.L_x_722:
/*6420*/ @!PT LDS RZ, [RZ] ;
/*6430*/ @!PT LDS RZ, [RZ] ;
/*6440*/ @!PT LDS RZ, [RZ] ;
/*6450*/ @!PT LDS RZ, [RZ] ;
/*6460*/ MEMBAR.SC.GPU ;
/*6470*/ ERRBAR;
/*6480*/ CCTL.IVALL ;
.L_x_724:
/*6490*/ BSYNC B6 ;
.L_x_721:
/*64a0*/ ISETP.NE.AND P1, PT, R2, RZ, PT ;
/*64b0*/ ISETP.GE.U32.AND P0, PT, R32, 0x40, PT ;
/*64c0*/ ISETP.GT.U32.OR P0, PT, R22, 0x1f, !P0 ;
/*64d0*/ @!P1 LDS R3, [R27+`(__openmp_nvptx_data_transfer_temporary_storage)] ;
/*64e0*/ @!P1 IMAD.MOV.U32 R17, RZ, RZ, R3 ;
/*64f0*/ @P0 BRA `(.L_x_706) ;
/*6500*/ SHF.R.U32.HI R32, RZ, 0x6, R32 ;
.L_x_726:
/*6510*/ LOP3.LUT R0, R32, 0xffff, RZ, 0xc0, !PT ;
/*6520*/ LOP3.LUT R3, R18, 0xffff, RZ, 0xc0, !PT ;
/*6530*/ PRMT R29, R32, 0x9910, RZ ;
/*6540*/ ISETP.GE.U32.AND P0, PT, R3, R0, PT ;
/*6550*/ P2R R2, PR, RZ, 0x1 ;
/*6560*/ BRA.DIV ~URZ, `(.L_x_725) ;
/*6570*/ SHFL.DOWN PT, R19, R17, R29, 0x1f ;
/*6580*/ SHFL.DOWN PT, R4, R16, R29, 0x1f ;
.L_x_772:
/*6590*/ ISETP.NE.AND P0, PT, R2, RZ, PT ;
/*65a0*/ IMAD.MOV.U32 R5, RZ, RZ, R19 ;
/*65b0*/ IADD3 R32, R33, 0x1, RZ ;
/*65c0*/ FSEL R16, R16, RZ, !P0 ;
/*65d0*/ FSEL R17, R17, -RZ, !P0 ;
/*65e0*/ ISETP.GT.U32.AND P0, PT, R33, 0x2, PT ;
/*65f0*/ SHF.R.U32.HI R33, RZ, 0x1, R32.reuse ;
/*6600*/ DADD R16, R4, R16 ;
/*6610*/ SHF.R.U32.HI R32, RZ, 0x2, R32 ;
/*6620*/ @P0 BRA `(.L_x_726) ;
.L_x_706:
/*6630*/ BSYNC B7 ;
.L_x_705:
/*6640*/ ISETP.NE.AND P0, PT, R18, RZ, PT ;
.L_x_698:
/*6650*/ BSYNC B8 ;
.L_x_697:
/*6660*/ @!P0 LDS.64 R2, [`($__c_ratio_shared8_$_0__2702)] ;
/*6670*/ @!P0 DADD R2, R2, R16 ;
/*6680*/ @!P0 STS.64 [`($__c_ratio_shared8_$_0__2702)], R2 ;
.L_x_688:
/*6690*/ BSYNC B9 ;
.L_x_687:
/*66a0*/ ISETP.NE.AND P0, PT, R22, RZ, PT ;
/*66b0*/ WARPSYNC 0xffffffff ;
/*66c0*/ BAR.SYNC 0x0 ;
/*66d0*/ @!P0 MOV R0, 0x1 ;
/*66e0*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_0)], RZ ;
/*66f0*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_1)], RZ ;
/*6700*/ @!P0 STS [`(_ZN4ompx5state9TeamStateE_$_2)], R0 ;
/*6710*/ BAR.SYNC 0x0 ;
/*6720*/ BAR.SYNC 0x0 ;
/*6730*/ BRA `(.L_x_727) ;
.L_x_686:
/*6740*/ ISETP.GT.U32.AND P0, PT, R2, 0x1, PT ;
/*6750*/ @P0 BRA `(.L_x_728) ;
/*6760*/ IMAD.MOV.U32 R0, RZ, RZ, 0x1 ;
/*6770*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], R0 ;
/*6780*/ LDS.64 R8, [`($__psiMinv_cols6_shared_$_0__2696)] ;
/*6790*/ ISETP.NE.U32.AND P0, PT, R8, RZ, PT ;
/*67a0*/ ISETP.NE.AND.EX P0, PT, R9, RZ, PT, P0 ;
/*67b0*/ @!P0 BRA `(.L_x_729) ;
/*67c0*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_1)] ;
/*67d0*/ BSSY B0, `(.L_x_730) ;
/*67e0*/ CS2R R16, SRZ ;
/*67f0*/ LDS R5, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*6800*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*6810*/ IADD3 R0, P1, R8, -0x1, RZ ;
/*6820*/ SEL R11, R22, RZ, !P0 ;
/*6830*/ IADD3.X R3, R9, -0x1, RZ, P1, !PT ;
/*6840*/ ISETP.GE.U32.AND P0, PT, R0, R11, PT ;
/*6850*/ SHF.R.S32.HI R14, RZ, 0x1f, R11 ;
/*6860*/ ISETP.NE.AND P1, PT, R5, RZ, PT ;
/*6870*/ ISETP.GE.U32.AND.EX P0, PT, R3, R14, PT, P0 ;
/*6880*/ SEL R2, R30, R5, !P1 ;
/*6890*/ @!P0 BRA `(.L_x_731) ;
/*68a0*/ LDS R13, [`($__iw_shared5_$_0__2688)] ;
/*68b0*/ LDS.64 R4, [`($__psiMinv_temp_list_devptr5_shared_$_0__2694)] ;
/*68c0*/ LDS.64 R6, [`($__psiV_temp_list_ptr3_shared4_$_0__2686)] ;
/*68d0*/ LDS R10, [`($__WorkingIndex7_shared7_$_0__2698)] ;
/*68e0*/ IMAD.WIDE.U32 R4, R13, 0x8, R4 ;
/*68f0*/ IMAD.WIDE.U32 R12, R13, 0x8, R6 ;
/*6900*/ LD.E.64 R4, [R4.64] ;
/*6910*/ LD.E.64 R16, [R12.64] ;
/*6920*/ MOV R7, R11 ;
/*6930*/ IMAD.WIDE R10, R10, 0x8, RZ ;
/*6940*/ SHF.L.U64.HI R6, R8.reuse, 0x3, R9 ;
/*6950*/ SHF.L.U32 R8, R8, 0x3, RZ ;
/*6960*/ IMAD.MOV.U32 R9, RZ, RZ, R14 ;
/*6970*/ MOV R33, R7.reuse ;
/*6980*/ IMAD R14, R6, R7, RZ ;
/*6990*/ IMAD.WIDE.U32 R18, R8, R7, R10 ;
/*69a0*/ SHF.R.S32.HI R13, RZ, 0x1f, R2 ;
/*69b0*/ IMAD R21, R8, R9, R14 ;
/*69c0*/ SHF.L.U64.HI R14, R2, 0x3, R13 ;
/*69d0*/ IMAD R12, R6, R2, RZ ;
/*69e0*/ IADD3 R11, P1, R4, R18, RZ ;
/*69f0*/ LEA R15, P0, R7, R16, 0x3 ;
/*6a00*/ IADD3.X R10, R5, R19, R21, P1, !PT ;
/*6a10*/ IMAD.WIDE.U32 R4, R8, R2, RZ ;
/*6a20*/ LEA.HI.X R21, R7.reuse, R17, R9.reuse, 0x3, P0 ;
/*6a30*/ IADD3 R57, P0, R7, 0x1, RZ ;
/*6a40*/ IMAD R19, R13, R8, R12 ;
/*6a50*/ CS2R R16, SRZ ;
/*6a60*/ IMAD.MOV.U32 R12, RZ, RZ, R9 ;
/*6a70*/ IADD3.X R39, RZ, R9, RZ, P0, !PT ;
/*6a80*/ IMAD.IADD R19, R5, 0x1, R19 ;
.L_x_735:
/*6a90*/ ISETP.GE.U32.AND P0, PT, R33, R57, PT ;
/*6aa0*/ IMAD.MOV.U32 R53, RZ, RZ, R11 ;
/*6ab0*/ IMAD.MOV.U32 R54, RZ, RZ, R10 ;
/*6ac0*/ ISETP.GE.U32.AND.EX P0, PT, R12, R39, PT, P0 ;
/*6ad0*/ IMAD.MOV.U32 R50, RZ, RZ, R15 ;
/*6ae0*/ IMAD.MOV.U32 R51, RZ, RZ, R21 ;
/*6af0*/ @P0 MOV R34, R53 ;
/*6b00*/ @P0 IMAD.MOV.U32 R35, RZ, RZ, R54 ;
/*6b10*/ @P0 IMAD.MOV.U32 R36, RZ, RZ, R50 ;
/*6b20*/ @P0 IMAD.MOV.U32 R37, RZ, RZ, R51 ;
/*6b30*/ @P0 LD.E.64 R34, [R34.64] ;
/*6b40*/ @P0 LD.E.64 R36, [R36.64] ;
/*6b50*/ MOV R20, R33 ;
/*6b60*/ IMAD.MOV.U32 R32, RZ, RZ, R12 ;
/*6b70*/ BSSY B1, `(.L_x_732) ;
/*6b80*/ @P0 IADD3 R20, P1, R20, 0x1, RZ ;
/*6b90*/ IADD3 R18, P3, R57.reuse, -R20.reuse, RZ ;
/*6ba0*/ @P0 IMAD.X R32, RZ, RZ, R32, P1 ;
/*6bb0*/ ISETP.GT.U32.AND P2, PT, R57, R20, PT ;
/*6bc0*/ ISETP.LE.U32.AND P1, PT, R18, 0x3, PT ;
/*6bd0*/ IMAD.X R18, R39.reuse, 0x1, ~R32, P3 ;
/*6be0*/ ISETP.GT.U32.AND.EX P2, PT, R39, R32, PT, P2 ;
/*6bf0*/ @P0 IADD3 R50, P3, R50, 0x8, RZ ;
/*6c00*/ ISETP.LE.U32.OR.EX P2, PT, R18, RZ, !P2, P1 ;
/*6c10*/ @P0 IADD3.X R51, RZ, R51, RZ, P3, !PT ;
/*6c20*/ @P0 IADD3 R53, P3, R53, R8, RZ ;
/*6c30*/ PLOP3.LUT P1, PT, P0, PT, PT, 0x8, 0x0 ;
/*6c40*/ @P0 IMAD.X R54, R54, 0x1, R6, P3 ;
/*6c50*/ @P0 DFMA R16, R34, R36, R16 ;
/*6c60*/ @P2 BRA `(.L_x_733) ;
/*6c70*/ IADD3 R59, P0, R57, -0x3, RZ ;
/*6c80*/ PLOP3.LUT P1, PT, PT, PT, PT, 0x8, 0x0 ;
/*6c90*/ IADD3.X R61, R39, -0x1, RZ, P0, !PT ;
.L_x_734:
/*6ca0*/ IADD3 R52, P0, R53, R8.reuse, RZ ;
/*6cb0*/ IMAD.MOV.U32 R34, RZ, RZ, R50 ;
/*6cc0*/ IMAD.MOV.U32 R35, RZ, RZ, R51 ;
/*6cd0*/ IMAD.MOV.U32 R40, RZ, RZ, R53 ;
/*6ce0*/ IMAD.MOV.U32 R41, RZ, RZ, R54 ;
/*6cf0*/ LD.E.64 R50, [R34.64] ;
/*6d00*/ IMAD.X R53, R54, 0x1, R6, P0 ;
/*6d10*/ IADD3 R54, P0, R52, R8, RZ ;
/*6d20*/ LD.E.64 R44, [R40.64] ;
/*6d30*/ IADD3.X R55, R53, R6, RZ, P0, !PT ;
/*6d40*/ LD.E.64 R46, [R34.64+0x8] ;
/*6d50*/ IADD3 R36, P0, R54, R8, RZ ;
/*6d60*/ LD.E.64 R52, [R52.64] ;
/*6d70*/ IMAD.X R37, R55, 0x1, R6, P0 ;
/*6d80*/ LD.E.64 R48, [R34.64+0x10] ;
/*6d90*/ LD.E.64 R54, [R54.64] ;
/*6da0*/ LD.E.64 R40, [R34.64+0x18] ;
/*6db0*/ LD.E.64 R42, [R36.64] ;
/*6dc0*/ IADD3 R20, P0, R20, 0x4, RZ ;
/*6dd0*/ IMAD.X R32, RZ, RZ, R32, P0 ;
/*6de0*/ ISETP.GE.U32.AND P0, PT, R20, R59, PT ;
/*6df0*/ ISETP.GE.U32.AND.EX P0, PT, R32, R61, PT, P0 ;
/*6e00*/ DFMA R44, R44, R50, R16 ;
/*6e10*/ DFMA R44, R52, R46, R44 ;
/*6e20*/ IADD3 R50, P2, R34, 0x20, RZ ;
/*6e30*/ IADD3 R53, P3, R36, R8, RZ ;
/*6e40*/ DFMA R44, R54, R48, R44 ;
/*6e50*/ IADD3.X R51, RZ, R35, RZ, P2, !PT ;
/*6e60*/ IMAD.X R54, R37, 0x1, R6, P3 ;
/*6e70*/ DFMA R16, R42, R40, R44 ;
/*6e80*/ @!P0 BRA `(.L_x_734) ;
.L_x_733:
/*6e90*/ BSYNC B1 ;
.L_x_732:
/*6ea0*/ IADD3 R18, P3, R57.reuse, -R20.reuse, RZ ;
/*6eb0*/ ISETP.GT.U32.AND P2, PT, R57, R20, PT ;
/*6ec0*/ ISETP.LE.U32.AND P0, PT, R18, 0x1, PT ;
/*6ed0*/ IMAD.X R18, R39.reuse, 0x1, ~R32, P3 ;
/*6ee0*/ ISETP.GT.U32.AND.EX P2, PT, R39, R32, PT, P2 ;
/*6ef0*/ ISETP.LE.U32.OR.EX P0, PT, R18, RZ, !P2, P0 ;
/*6f00*/ @!P0 PLOP3.LUT P1, PT, PT, PT, PT, 0x8, 0x0 ;
/*6f10*/ @!P0 IMAD.MOV.U32 R34, RZ, RZ, R53 ;
/*6f20*/ @!P0 IADD3 R46, P2, R53, R8.reuse, RZ ;
/*6f30*/ @!P0 IMAD.MOV.U32 R35, RZ, RZ, R54 ;
/*6f40*/ @!P0 IADD3 R20, P3, R20, 0x2, RZ ;
/*6f50*/ @!P0 IMAD.MOV.U32 R48, RZ, RZ, R50 ;
/*6f60*/ @!P0 IMAD.MOV.U32 R49, RZ, RZ, R51 ;
/*6f70*/ @!P0 LD.E.64 R40, [R34.64] ;
/*6f80*/ @!P0 IMAD.X R47, R54, 0x1, R6, P2 ;
/*6f90*/ ISETP.LT.U32.AND P2, PT, R20, R57, PT ;
/*6fa0*/ @!P0 IMAD.X R32, RZ, RZ, R32, P3 ;
/*6fb0*/ @!P0 LD.E.64 R42, [R48.64] ;
/*6fc0*/ ISETP.LT.U32.OR.EX P1, PT, R32, R39, P1, P2 ;
/*6fd0*/ @!P0 LD.E.64 R44, [R48.64+0x8] ;
/*6fe0*/ @!P0 IADD3 R53, P2, R46, R8, RZ ;
/*6ff0*/ @!P0 LD.E.64 R38, [R46.64] ;
/*7000*/ @!P0 IMAD.X R54, R47, 0x1, R6, P2 ;
/*7010*/ @!P0 IADD3 R50, P2, R50, 0x10, RZ ;
/*7020*/ IMAD.MOV.U32 R34, RZ, RZ, R53 ;
/*7030*/ @!P0 IMAD.X R51, RZ, RZ, R51, P2 ;
/*7040*/ IMAD.MOV.U32 R35, RZ, RZ, R54 ;
/*7050*/ IMAD.MOV.U32 R36, RZ, RZ, R50 ;
/*7060*/ IMAD.MOV.U32 R37, RZ, RZ, R51 ;
/*7070*/ @P1 LD.E.64 R34, [R34.64] ;
/*7080*/ @P1 LD.E.64 R36, [R36.64] ;
/*7090*/ IADD3 R7, P2, R2, R7, RZ ;
/*70a0*/ IMAD.X R18, R13, 0x1, R9, P2 ;
/*70b0*/ ISETP.LT.U32.AND P2, PT, R7, R0, PT ;
/*70c0*/ ISETP.LT.U32.AND.EX P2, PT, R18, R3, PT, P2 ;
/*70d0*/ SEL R7, R7, R0, P2 ;
/*70e0*/ SEL R9, R18, R3, P2 ;
/*70f0*/ IADD3 R57, P3, R7, 0x1, RZ ;
/*7100*/ IADD3 R11, P2, R11, R4, RZ ;
/*7110*/ IMAD.X R10, R10, 0x1, R19, P2 ;
/*7120*/ @!P0 DFMA R40, R40, R42, R16 ;
/*7130*/ @!P0 DFMA R16, R38, R44, R40 ;
/*7140*/ IADD3 R33, P0, R2, R33, RZ ;
/*7150*/ IMAD.X R39, RZ, RZ, R9, P3 ;
/*7160*/ IMAD.X R12, R13, 0x1, R12, P0 ;
/*7170*/ ISETP.GE.U32.AND P0, PT, R33, R57, PT ;
/*7180*/ ISETP.GE.U32.AND.EX P0, PT, R12, R39, PT, P0 ;
/*7190*/ LEA R15, P3, R2, R15, 0x3 ;
/*71a0*/ IMAD.X R21, R21, 0x1, R14, P3 ;
/*71b0*/ @P1 DFMA R16, R34, R36, R16 ;
/*71c0*/ @!P0 BRA `(.L_x_735) ;
.L_x_731:
/*71d0*/ BSYNC B0 ;
.L_x_730:
/*71e0*/ ISETP.NE.AND P1, PT, R2, 0x1, PT ;
/*71f0*/ PLOP3.LUT P0, PT, PT, PT, PT, 0x8, 0x0 ;
/*7200*/ @!P1 BRA `(.L_x_736) ;
/*7210*/ IADD3 R32, R2.reuse, 0x1f, RZ ;
/*7220*/ BSSY B6, `(.L_x_737) ;
/*7230*/ LOP3.LUT P0, R18, R2, 0x1f, RZ, 0xc0, !PT ;
/*7240*/ SHF.R.U32.HI R35, RZ, 0x5, R32 ;
/*7250*/ IADD3 R3, R35, -0x1, RZ ;
/*7260*/ ISETP.GE.U32.AND P1, PT, R26, R3, PT ;
/*7270*/ @P0 BRA P1, `(.L_x_738) ;
/*7280*/ BRA.DIV ~URZ, `(.L_x_739) ;
/*7290*/ SHFL.DOWN PT, R5, R17, 0x10, 0x1f ;
/*72a0*/ SHFL.DOWN PT, R4, R16, 0x10, 0x1f ;
/*72b0*/ DADD R6, R16, R4 ;
/*72c0*/ SHFL.DOWN PT, R5, R7, 0x8, 0x1f ;
/*72d0*/ SHFL.DOWN PT, R4, R6, 0x8, 0x1f ;
/*72e0*/ DADD R8, R6, R4 ;
/*72f0*/ SHFL.DOWN PT, R5, R9, 0x4, 0x1f ;
/*7300*/ SHFL.DOWN PT, R4, R8, 0x4, 0x1f ;
/*7310*/ DADD R10, R8, R4 ;
/*7320*/ SHFL.DOWN PT, R5, R11, 0x2, 0x1f ;
/*7330*/ SHFL.DOWN PT, R4, R10, 0x2, 0x1f ;
/*7340*/ DADD R18, R10, R4 ;
/*7350*/ SHFL.DOWN PT, R17, R19, 0x1, 0x1f ;
/*7360*/ SHFL.DOWN PT, R4, R18, 0x1, 0x1f ;
.L_x_773:
/*7370*/ MOV R5, R17 ;
/*7380*/ DADD R16, R4, R18 ;
/*7390*/ BRA `(.L_x_740) ;
.L_x_738:
/*73a0*/ ISETP.GE.U32.AND P0, PT, R18, 0x2, PT ;
/*73b0*/ @!P0 BRA `(.L_x_740) ;
/*73c0*/ SHF.R.U32.HI R19, RZ, 0x1, R18 ;
.L_x_742:
/*73d0*/ LOP3.LUT R0, R19, 0xffff, RZ, 0xc0, !PT ;
/*73e0*/ ISETP.GE.U32.AND P0, PT, R25, R0, PT ;
/*73f0*/ P2R R34, PR, RZ, 0x1 ;
/*7400*/ BRA.DIV ~URZ, `(.L_x_741) ;
/*7410*/ SHFL.DOWN PT, R33, R17, R19, 0x1f ;
/*7420*/ SHFL.DOWN PT, R4, R16, R19, 0x1f ;
.L_x_774:
/*7430*/ ISETP.NE.AND P0, PT, R34, RZ, PT ;
/*7440*/ IMAD.MOV.U32 R5, RZ, RZ, R33 ;
/*7450*/ IADD3 R19, R18, 0x1, RZ ;
/*7460*/ FSEL R16, R16, RZ, !P0 ;
/*7470*/ FSEL R17, R17, -RZ, !P0 ;
/*7480*/ ISETP.GT.U32.AND P0, PT, R18, 0x2, PT ;
/*7490*/ SHF.R.U32.HI R18, RZ, 0x1, R19.reuse ;
/*74a0*/ DADD R16, R4, R16 ;
/*74b0*/ SHF.R.U32.HI R19, RZ, 0x2, R19 ;
/*74c0*/ @P0 BRA `(.L_x_742) ;
.L_x_740:
/*74d0*/ BSYNC B6 ;
.L_x_737:
/*74e0*/ ISETP.GE.U32.AND P1, PT, R2, 0x21, PT ;
/*74f0*/ BSSY B7, `(.L_x_736) ;
/*7500*/ ISETP.NE.AND P0, PT, R29, RZ, PT ;
/*7510*/ @!P1 BRA `(.L_x_743) ;
/*7520*/ BRA.DIV ~URZ, `(.L_x_744) ;
/*7530*/ BAR.SYNC 0x7, R2 ;
.L_x_775:
/*7540*/ ISETP.NE.AND P0, PT, R31, RZ, PT ;
/*7550*/ BSSY B6, `(.L_x_745) ;
/*7560*/ @!P0 STS [R28+`(__openmp_nvptx_data_transfer_temporary_storage)], R16 ;
/*7570*/ LDS R0, [`(IsSPMDMode)] ;
/*7580*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*7590*/ @P0 BRA `(.L_x_746) ;
/*75a0*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*75b0*/ ISETP.NE.AND P1, PT, R22, UR38, PT ;
/*75c0*/ ISETP.NE.OR P0, PT, R0, RZ, P1 ;
/*75d0*/ @P0 BRA `(.L_x_747) ;
/*75e0*/ @!PT LDS RZ, [RZ] ;
/*75f0*/ @!PT LDS RZ, [RZ] ;
/*7600*/ @!PT LDS RZ, [RZ] ;
/*7610*/ @!PT LDS RZ, [RZ] ;
/*7620*/ MEMBAR.SC.GPU ;
/*7630*/ ERRBAR;
/*7640*/ CCTL.IVALL ;
/*7650*/ BRA `(.L_x_748) ;
.L_x_747:
/*7660*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*7670*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*7680*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*7690*/ SEL R5, R30, R3, !P1 ;
/*76a0*/ SEL R5, R5, 0x1, !P0 ;
/*76b0*/ BRA.DIV ~URZ, `(.L_x_749) ;
/*76c0*/ BAR.SYNC 0x7, R5 ;
/*76d0*/ BRA `(.L_x_748) ;
.L_x_746:
/*76e0*/ WARPSYNC 0xffffffff ;
/*76f0*/ BAR.SYNC 0x0 ;
.L_x_748:
/*7700*/ BSYNC B6 ;
.L_x_745:
/*7710*/ LDS R0, [`(IsSPMDMode)] ;
/*7720*/ ISETP.GE.U32.AND P1, PT, R22, R35, PT ;
/*7730*/ P2R R2, PR, RZ, 0x2 ;
/*7740*/ @!P1 LDS R3, [R27+`(__openmp_nvptx_data_transfer_temporary_storage)] ;
/*7750*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*7760*/ @P0 WARPSYNC 0xffffffff ;
/*7770*/ @!P1 IMAD.MOV.U32 R16, RZ, RZ, R3 ;
/*7780*/ @P0 BAR.SYNC 0x0 ;
/*7790*/ @P0 BRA `(.L_x_750) ;
/*77a0*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*77b0*/ ISETP.EQ.AND P1, PT, R22, UR38, PT ;
/*77c0*/ BSSY B6, `(.L_x_750) ;
/*77d0*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*77e0*/ @!P0 BRA P1, `(.L_x_751) ;
/*77f0*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*7800*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*7810*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*7820*/ SEL R5, R30, R3, !P1 ;
/*7830*/ SEL R5, R5, 0x1, !P0 ;
/*7840*/ BRA.DIV ~URZ, `(.L_x_752) ;
/*7850*/ BAR.SYNC 0x7, R5 ;
/*7860*/ BRA `(.L_x_753) ;
.L_x_751:
/*7870*/ @!PT LDS RZ, [RZ] ;
/*7880*/ @!PT LDS RZ, [RZ] ;
/*7890*/ @!PT LDS RZ, [RZ] ;
/*78a0*/ @!PT LDS RZ, [RZ] ;
/*78b0*/ MEMBAR.SC.GPU ;
/*78c0*/ ERRBAR;
/*78d0*/ CCTL.IVALL ;
.L_x_753:
/*78e0*/ BSYNC B6 ;
.L_x_750:
/*78f0*/ ISETP.NE.AND P0, PT, R31, RZ, PT ;
/*7900*/ @!P0 STS [R28+`(__openmp_nvptx_data_transfer_temporary_storage)], R17 ;
/*7910*/ LDS R0, [`(IsSPMDMode)] ;
/*7920*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*7930*/ @P0 WARPSYNC 0xffffffff ;
/*7940*/ @P0 BAR.SYNC 0x0 ;
/*7950*/ @P0 BRA `(.L_x_754) ;
/*7960*/ LDS R0, [`(_ZN4ompx5state9TeamStateE_$_0)] ;
/*7970*/ ISETP.EQ.AND P1, PT, R22, UR38, PT ;
/*7980*/ BSSY B6, `(.L_x_754) ;
/*7990*/ ISETP.NE.AND P0, PT, R0, RZ, PT ;
/*79a0*/ @!P0 BRA P1, `(.L_x_755) ;
/*79b0*/ LDS R3, [`(_ZN4ompx5state9TeamStateE_$_2)] ;
/*79c0*/ ISETP.NE.AND P0, PT, R0, 0x1, PT ;
/*79d0*/ ISETP.NE.AND P1, PT, R3, RZ, PT ;
/*79e0*/ SEL R5, R30, R3, !P1 ;
/*79f0*/ SEL R5, R5, 0x1, !P0 ;
/*7a00*/ BRA.DIV ~URZ, `(.L_x_756) ;
/*7a10*/ BAR.SYNC 0x7, R5 ;
/*7a20*/ BRA `(.L_x_757) ;
.L_x_755:
/*7a30*/ @!PT LDS RZ, [RZ] ;
/*7a40*/ @!PT LDS RZ, [RZ] ;
/*7a50*/ @!PT LDS RZ, [RZ] ;
/*7a60*/ @!PT LDS RZ, [RZ] ;
/*7a70*/ MEMBAR.SC.GPU ;
/*7a80*/ ERRBAR;
/*7a90*/ CCTL.IVALL ;
.L_x_757:
/*7aa0*/ BSYNC B6 ;
.L_x_754:
/*7ab0*/ ISETP.NE.AND P2, PT, R2, RZ, PT ;
/*7ac0*/ ISETP.GE.U32.AND P1, PT, R32, 0x40, PT ;
/*7ad0*/ ISETP.NE.AND P0, PT, R29, RZ, PT ;
/*7ae0*/ ISETP.GT.U32.OR P1, PT, R22, 0x1f, !P1 ;
/*7af0*/ @!P2 LDS R3, [R27+`(__openmp_nvptx_data_transfer_temporary_storage)] ;
/*7b00*/ @!P2 IMAD.MOV.U32 R17, RZ, RZ, R3 ;
/*7b10*/ @P1 BRA `(.L_x_743) ;
/*7b20*/ BSSY B6, `(.L_x_758) ;
/*7b30*/ SHF.R.U32.HI R32, RZ, 0x6, R32 ;
.L_x_760:
/*7b40*/ LOP3.LUT R0, R32, 0xffff, RZ, 0xc0, !PT ;
/*7b50*/ LOP3.LUT R3, R22, 0xffff, RZ, 0xc0, !PT ;
/*7b60*/ PRMT R33, R32, 0x9910, RZ ;
/*7b70*/ ISETP.GE.U32.AND P0, PT, R3, R0, PT ;
/*7b80*/ P2R R2, PR, RZ, 0x1 ;
/*7b90*/ BRA.DIV ~URZ, `(.L_x_759) ;
/*7ba0*/ SHFL.DOWN PT, R19, R17, R33, 0x1f ;
/*7bb0*/ SHFL.DOWN PT, R4, R16, R33, 0x1f ;
.L_x_776:
/*7bc0*/ ISETP.NE.AND P0, PT, R2, RZ, PT ;
/*7bd0*/ IMAD.MOV.U32 R5, RZ, RZ, R19 ;
/*7be0*/ IADD3 R32, R35, 0x1, RZ ;
/*7bf0*/ FSEL R16, R16, RZ, !P0 ;
/*7c00*/ FSEL R17, R17, -RZ, !P0 ;
/*7c10*/ ISETP.GT.U32.AND P0, PT, R35, 0x2, PT ;
/*7c20*/ SHF.R.U32.HI R35, RZ, 0x1, R32.reuse ;
/*7c30*/ DADD R16, R4, R16 ;
/*7c40*/ SHF.R.U32.HI R32, RZ, 0x2, R32 ;
/*7c50*/ @P0 BRA `(.L_x_760) ;
/*7c60*/ BSYNC B6 ;
.L_x_758:
/*7c70*/ ISETP.NE.AND P0, PT, R29, RZ, PT ;
.L_x_743:
/*7c80*/ BSYNC B7 ;
.L_x_736:
/*7c90*/ @!P0 LDS.64 R2, [`($__c_ratio_shared8_$_0__2702)] ;
/*7ca0*/ @!P0 DADD R2, R2, R16 ;
/*7cb0*/ @!P0 STS.64 [`($__c_ratio_shared8_$_0__2702)], R2 ;
.L_x_729:
/*7cc0*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], RZ ;
/*7cd0*/ BRA `(.L_x_727) ;
.L_x_728:
/*7ce0*/ MOV R2, 0x1 ;
/*7cf0*/ STS [`(_ZN4ompx5state9TeamStateE_$_2)], R0 ;
/*7d00*/ STS [`(_ZN4ompx5state9TeamStateE_$_1)], R2 ;
/*7d10*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], R2 ;
/*7d20*/ BAR.SYNC 0x8 ;
/*7d30*/ BAR.SYNC 0x8 ;
/*7d40*/ STS [`(_ZN4ompx5state9TeamStateE_$_0)], RZ ;
/*7d50*/ STS [`(_ZN4ompx5state9TeamStateE_$_1)], RZ ;
/*7d60*/ STS [`(_ZN4ompx5state9TeamStateE_$_2)], R2 ;
.L_x_727:
/*7d70*/ ISETP.NE.AND P0, PT, R22, RZ, PT ;
/*7d80*/ WARPSYNC 0xffffffff ;
/*7d90*/ P2R R29, PR, RZ, 0x1 ;
/*7da0*/ @!P0 LDS R2, [`($__iw_shared5_$_0__2688)] ;
/*7db0*/ @!P0 MOV R3, 0x8 ;
/*7dc0*/ @!P0 LDS.64 R4, [`($__c_ratio_shared8_$_0__2702)] ;
/*7dd0*/ @!P0 IMAD.WIDE.U32 R2, R2, R3, c[0x0][0x1b0] ;
/*7de0*/ @!P0 STG.E.64 [R2.64], R4 ;
/*7df0*/ BAR.SYNC 0x0 ;
/*7e00*/ @P0 BRA `(.L_x_761) ;
/*7e10*/ LDS.64 R4, [`($__c_ratio_shared8_$_0__2702)] ;
/*7e20*/ MOV R17, 0x8 ;
/*7e30*/ LDS R16, [`($__iw_shared5_$_0__2688)] ;
/*7e40*/ MUFU.RCP64H R3, R5 ;
/*7e50*/ IADD3 R2, R5, 0x300402, RZ ;
/*7e60*/ IMAD.WIDE.U32 R16, R16, R17, c[0x0][0x1b8] ;
/*7e70*/ FSETP.GEU.AND P0, PT, |R2|, 5.8789094863358348022e-39, PT ;
/*7e80*/ DFMA R6, -R4, R2, 1 ;
/*7e90*/ DFMA R6, R6, R6, R6 ;
/*7ea0*/ DFMA R6, R2, R6, R2 ;
/*7eb0*/ DFMA R8, -R4, R6, 1 ;
/*7ec0*/ DFMA R2, R6, R8, R6 ;
/*7ed0*/ @P0 BRA `(.L_x_762) ;
/*7ee0*/ LOP3.LUT R6, R5, 0x7fffffff, RZ, 0xc0, !PT ;
/*7ef0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_75@srel)) ;
/*7f00*/ IADD3 R6, R6, -0x100000, RZ ;
/*7f10*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_75@srel)) ;
/*7f20*/ CALL.ABS.NOINC `(__cuda_sm20_dblrcp_rn_slowpath_v3) ;
.L_x_75:
/*7f30*/ MOV R2, R4 ;
/*7f40*/ MOV R3, R5 ;
.L_x_762:
/*7f50*/ STG.E.64 [R16.64], R2 ;
.L_x_761:
/*7f60*/ IADD3 R23, R23, 0x1, RZ ;
/*7f70*/ WARPSYNC 0xffffffff ;
/*7f80*/ BAR.SYNC 0x0 ;
/*7f90*/ ISETP.GT.U32.AND P0, PT, R23, R24, PT ;
/*7fa0*/ @P0 EXIT ;
/*7fb0*/ BRA `(.L_x_763) ;
.L_x_621:
/*7fc0*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*7fd0*/ MOV R6, 0x1f ;
/*7fe0*/ IMAD.MOV.U32 R5, RZ, RZ, 0x10 ;
/*7ff0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_76@srel)) ;
/*8000*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*8010*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_76@srel)) ;
/*8020*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_76:
/*8030*/ IMAD.MOV.U32 R37, RZ, RZ, R4 ;
/*8040*/ MOV R5, 0x10 ;
/*8050*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8060*/ MOV R4, R18 ;
/*8070*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*8080*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_77@srel)) ;
/*8090*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_77@srel)) ;
/*80a0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_77:
/*80b0*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*80c0*/ MOV R5, 0x10 ;
/*80d0*/ IMAD.MOV.U32 R39, RZ, RZ, R37 ;
/*80e0*/ MOV R7, 0xffffffff ;
/*80f0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8100*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_78@srel)) ;
/*8110*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*8120*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_78@srel)) ;
/*8130*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_78:
/*8140*/ IMAD.MOV.U32 R37, RZ, RZ, R4 ;
/*8150*/ MOV R5, 0x10 ;
/*8160*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8170*/ MOV R4, R16 ;
/*8180*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*8190*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_79@srel)) ;
/*81a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_79@srel)) ;
/*81b0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_79:
/*81c0*/ IMAD.MOV.U32 R36, RZ, RZ, R4 ;
/*81d0*/ MOV R5, 0x10 ;
/*81e0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*81f0*/ MOV R7, 0xffffffff ;
/*8200*/ IMAD.MOV.U32 R4, RZ, RZ, R35 ;
/*8210*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_80@srel)) ;
/*8220*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_80@srel)) ;
/*8230*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_80:
/*8240*/ IMAD.MOV.U32 R35, RZ, RZ, R4 ;
/*8250*/ MOV R5, 0x10 ;
/*8260*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8270*/ MOV R4, R34 ;
/*8280*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*8290*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_81@srel)) ;
/*82a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_81@srel)) ;
/*82b0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_81:
/*82c0*/ DADD R18, R18, R38 ;
/*82d0*/ IMAD.MOV.U32 R5, RZ, RZ, R35 ;
/*82e0*/ MOV R7, 0xffffffff ;
/*82f0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8300*/ DADD R36, R16, R36 ;
/*8310*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_82@srel)) ;
/*8320*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_82@srel)) ;
/*8330*/ DADD R34, R4, R40 ;
/*8340*/ MOV R5, 0x8 ;
/*8350*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*8360*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_82:
/*8370*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*8380*/ MOV R5, 0x8 ;
/*8390*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*83a0*/ MOV R7, 0xffffffff ;
/*83b0*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*83c0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_83@srel)) ;
/*83d0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_83@srel)) ;
/*83e0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_83:
/*83f0*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*8400*/ MOV R5, 0x8 ;
/*8410*/ IMAD.MOV.U32 R39, RZ, RZ, R17 ;
/*8420*/ MOV R7, 0xffffffff ;
/*8430*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8440*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_84@srel)) ;
/*8450*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*8460*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_84@srel)) ;
/*8470*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_84:
/*8480*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*8490*/ MOV R5, 0x8 ;
/*84a0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*84b0*/ MOV R7, 0xffffffff ;
/*84c0*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*84d0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_85@srel)) ;
/*84e0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_85@srel)) ;
/*84f0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_85:
/*8500*/ IMAD.MOV.U32 R40, RZ, RZ, R4 ;
/*8510*/ MOV R5, 0x8 ;
/*8520*/ IMAD.MOV.U32 R41, RZ, RZ, R17 ;
/*8530*/ MOV R7, 0xffffffff ;
/*8540*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8550*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_86@srel)) ;
/*8560*/ IMAD.MOV.U32 R4, RZ, RZ, R35 ;
/*8570*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_86@srel)) ;
/*8580*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_86:
/*8590*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*85a0*/ MOV R5, 0x8 ;
/*85b0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*85c0*/ MOV R7, 0xffffffff ;
/*85d0*/ IMAD.MOV.U32 R4, RZ, RZ, R34 ;
/*85e0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_87@srel)) ;
/*85f0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_87@srel)) ;
/*8600*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_87:
/*8610*/ DADD R18, R18, R38 ;
/*8620*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*8630*/ MOV R7, 0xffffffff ;
/*8640*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8650*/ DADD R36, R36, R40 ;
/*8660*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_88@srel)) ;
/*8670*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_88@srel)) ;
/*8680*/ DADD R34, R34, R4 ;
/*8690*/ MOV R5, 0x4 ;
/*86a0*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*86b0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_88:
/*86c0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*86d0*/ MOV R5, 0x4 ;
/*86e0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*86f0*/ MOV R7, 0xffffffff ;
/*8700*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*8710*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_89@srel)) ;
/*8720*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_89@srel)) ;
/*8730*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_89:
/*8740*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*8750*/ MOV R5, 0x4 ;
/*8760*/ IMAD.MOV.U32 R39, RZ, RZ, R17 ;
/*8770*/ MOV R7, 0xffffffff ;
/*8780*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8790*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_90@srel)) ;
/*87a0*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*87b0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_90@srel)) ;
/*87c0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_90:
/*87d0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*87e0*/ MOV R5, 0x4 ;
/*87f0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8800*/ MOV R7, 0xffffffff ;
/*8810*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*8820*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_91@srel)) ;
/*8830*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_91@srel)) ;
/*8840*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_91:
/*8850*/ IMAD.MOV.U32 R40, RZ, RZ, R4 ;
/*8860*/ MOV R5, 0x4 ;
/*8870*/ IMAD.MOV.U32 R41, RZ, RZ, R17 ;
/*8880*/ MOV R7, 0xffffffff ;
/*8890*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*88a0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_92@srel)) ;
/*88b0*/ IMAD.MOV.U32 R4, RZ, RZ, R35 ;
/*88c0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_92@srel)) ;
/*88d0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_92:
/*88e0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*88f0*/ MOV R5, 0x4 ;
/*8900*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8910*/ MOV R7, 0xffffffff ;
/*8920*/ IMAD.MOV.U32 R4, RZ, RZ, R34 ;
/*8930*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_93@srel)) ;
/*8940*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_93@srel)) ;
/*8950*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_93:
/*8960*/ DADD R18, R18, R38 ;
/*8970*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*8980*/ MOV R7, 0xffffffff ;
/*8990*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*89a0*/ DADD R36, R36, R40 ;
/*89b0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_94@srel)) ;
/*89c0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_94@srel)) ;
/*89d0*/ DADD R38, R34, R4 ;
/*89e0*/ MOV R5, 0x2 ;
/*89f0*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*8a00*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_94:
/*8a10*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*8a20*/ MOV R5, 0x2 ;
/*8a30*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8a40*/ MOV R7, 0xffffffff ;
/*8a50*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*8a60*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_95@srel)) ;
/*8a70*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_95@srel)) ;
/*8a80*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_95:
/*8a90*/ IMAD.MOV.U32 R34, RZ, RZ, R4 ;
/*8aa0*/ MOV R5, 0x2 ;
/*8ab0*/ IMAD.MOV.U32 R35, RZ, RZ, R17 ;
/*8ac0*/ MOV R7, 0xffffffff ;
/*8ad0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8ae0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_96@srel)) ;
/*8af0*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*8b00*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_96@srel)) ;
/*8b10*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_96:
/*8b20*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*8b30*/ MOV R5, 0x2 ;
/*8b40*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8b50*/ MOV R7, 0xffffffff ;
/*8b60*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*8b70*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_97@srel)) ;
/*8b80*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_97@srel)) ;
/*8b90*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_97:
/*8ba0*/ IMAD.MOV.U32 R40, RZ, RZ, R4 ;
/*8bb0*/ MOV R5, 0x2 ;
/*8bc0*/ IMAD.MOV.U32 R41, RZ, RZ, R17 ;
/*8bd0*/ MOV R7, 0xffffffff ;
/*8be0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8bf0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_98@srel)) ;
/*8c00*/ IMAD.MOV.U32 R4, RZ, RZ, R39 ;
/*8c10*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_98@srel)) ;
/*8c20*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_98:
/*8c30*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*8c40*/ MOV R5, 0x2 ;
/*8c50*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8c60*/ MOV R7, 0xffffffff ;
/*8c70*/ IMAD.MOV.U32 R4, RZ, RZ, R38 ;
/*8c80*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_99@srel)) ;
/*8c90*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_99@srel)) ;
/*8ca0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_99:
/*8cb0*/ DADD R18, R18, R34 ;
/*8cc0*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*8cd0*/ MOV R7, 0xffffffff ;
/*8ce0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8cf0*/ DADD R34, R36, R40 ;
/*8d00*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_100@srel)) ;
/*8d10*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_100@srel)) ;
/*8d20*/ DADD R36, R38, R4 ;
/*8d30*/ MOV R5, 0x1 ;
/*8d40*/ IMAD.MOV.U32 R4, RZ, RZ, R19.reuse ;
/*8d50*/ MOV R44, R18 ;
/*8d60*/ IMAD.MOV.U32 R45, RZ, RZ, R19 ;
/*8d70*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_100:
/*8d80*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*8d90*/ MOV R5, 0x1 ;
/*8da0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8db0*/ MOV R4, R18 ;
/*8dc0*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*8dd0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_101@srel)) ;
/*8de0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_101@srel)) ;
/*8df0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_101:
/*8e00*/ IMAD.MOV.U32 R16, RZ, RZ, R4 ;
/*8e10*/ MOV R5, 0x1 ;
/*8e20*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8e30*/ MOV R7, 0xffffffff ;
/*8e40*/ IMAD.MOV.U32 R4, RZ, RZ, R35 ;
/*8e50*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_102@srel)) ;
/*8e60*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_102@srel)) ;
/*8e70*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_102:
/*8e80*/ IMAD.MOV.U32 R19, RZ, RZ, R4 ;
/*8e90*/ MOV R5, 0x1 ;
/*8ea0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8eb0*/ MOV R7, 0xffffffff ;
/*8ec0*/ IMAD.MOV.U32 R4, RZ, RZ, R34 ;
/*8ed0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_103@srel)) ;
/*8ee0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_103@srel)) ;
/*8ef0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_103:
/*8f00*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*8f10*/ MOV R5, 0x1 ;
/*8f20*/ IMAD.MOV.U32 R39, RZ, RZ, R19 ;
/*8f30*/ MOV R7, 0xffffffff ;
/*8f40*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8f50*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_104@srel)) ;
/*8f60*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*8f70*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_104@srel)) ;
/*8f80*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_104:
/*8f90*/ IMAD.MOV.U32 R41, RZ, RZ, R4 ;
/*8fa0*/ MOV R5, 0x1 ;
/*8fb0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*8fc0*/ MOV R7, 0xffffffff ;
/*8fd0*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*8fe0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_105@srel)) ;
/*8ff0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_105@srel)) ;
/*9000*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_105:
/*9010*/ BRA `(.L_x_764) ;
.L_x_624:
/*9020*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*9030*/ MOV R6, 0x1f ;
/*9040*/ IMAD.MOV.U32 R5, RZ, RZ, R37 ;
/*9050*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_106@srel)) ;
/*9060*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*9070*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_106@srel)) ;
/*9080*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_106:
/*9090*/ IMAD.MOV.U32 R41, RZ, RZ, R4 ;
/*90a0*/ MOV R5, R37 ;
/*90b0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*90c0*/ MOV R7, 0xffffffff ;
/*90d0*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*90e0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_107@srel)) ;
/*90f0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_107@srel)) ;
/*9100*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_107:
/*9110*/ IMAD.MOV.U32 R40, RZ, RZ, R4 ;
/*9120*/ MOV R5, R37 ;
/*9130*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9140*/ MOV R7, 0xffffffff ;
/*9150*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*9160*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_108@srel)) ;
/*9170*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_108@srel)) ;
/*9180*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_108:
/*9190*/ IMAD.MOV.U32 R39, RZ, RZ, R4 ;
/*91a0*/ MOV R5, R37 ;
/*91b0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*91c0*/ MOV R7, 0xffffffff ;
/*91d0*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*91e0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_109@srel)) ;
/*91f0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_109@srel)) ;
/*9200*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_109:
/*9210*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*9220*/ MOV R5, R37 ;
/*9230*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9240*/ MOV R7, 0xffffffff ;
/*9250*/ IMAD.MOV.U32 R4, RZ, RZ, R35 ;
/*9260*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_110@srel)) ;
/*9270*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_110@srel)) ;
/*9280*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_110:
/*9290*/ IMAD.MOV.U32 R36, RZ, RZ, R4 ;
/*92a0*/ MOV R5, R37 ;
/*92b0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*92c0*/ MOV R7, 0xffffffff ;
/*92d0*/ IMAD.MOV.U32 R4, RZ, RZ, R34 ;
/*92e0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_111@srel)) ;
/*92f0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_111@srel)) ;
/*9300*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_111:
/*9310*/ MOV R2, R4 ;
/*9320*/ BRA `(.L_x_765) ;
.L_x_633:
/*9330*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*9340*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_112@srel)) ;
/*9350*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_112@srel)) ;
/*9360*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_112:
/*9370*/ BRA `(.L_x_632) ;
.L_x_640:
/*9380*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*9390*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_113@srel)) ;
/*93a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_113@srel)) ;
/*93b0*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_113:
/*93c0*/ BRA `(.L_x_639) ;
.L_x_643:
/*93d0*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*93e0*/ MOV R6, 0x1f ;
/*93f0*/ IMAD.MOV.U32 R5, RZ, RZ, R39 ;
/*9400*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_114@srel)) ;
/*9410*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*9420*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_114@srel)) ;
/*9430*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_114:
/*9440*/ IMAD.MOV.U32 R37, RZ, RZ, R4 ;
/*9450*/ MOV R5, R39 ;
/*9460*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9470*/ MOV R7, 0xffffffff ;
/*9480*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*9490*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_115@srel)) ;
/*94a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_115@srel)) ;
/*94b0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_115:
/*94c0*/ IMAD.MOV.U32 R36, RZ, RZ, R4 ;
/*94d0*/ MOV R5, R39 ;
/*94e0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*94f0*/ MOV R7, 0xffffffff ;
/*9500*/ IMAD.MOV.U32 R4, RZ, RZ, R33 ;
/*9510*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_116@srel)) ;
/*9520*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_116@srel)) ;
/*9530*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_116:
/*9540*/ IMAD.MOV.U32 R35, RZ, RZ, R4 ;
/*9550*/ MOV R5, R39 ;
/*9560*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9570*/ MOV R7, 0xffffffff ;
/*9580*/ IMAD.MOV.U32 R4, RZ, RZ, R32 ;
/*9590*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_117@srel)) ;
/*95a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_117@srel)) ;
/*95b0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_117:
/*95c0*/ IMAD.MOV.U32 R34, RZ, RZ, R4 ;
/*95d0*/ MOV R5, R39 ;
/*95e0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*95f0*/ MOV R7, 0xffffffff ;
/*9600*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*9610*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_118@srel)) ;
/*9620*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_118@srel)) ;
/*9630*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_118:
/*9640*/ IMAD.MOV.U32 R30, RZ, RZ, R4 ;
/*9650*/ MOV R5, R39 ;
/*9660*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9670*/ MOV R7, 0xffffffff ;
/*9680*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*9690*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_119@srel)) ;
/*96a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_119@srel)) ;
/*96b0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_119:
/*96c0*/ BRA `(.L_x_766) ;
.L_x_661:
/*96d0*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*96e0*/ MOV R6, 0x1f ;
/*96f0*/ IMAD.MOV.U32 R5, RZ, RZ, 0x10 ;
/*9700*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_120@srel)) ;
/*9710*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*9720*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_120@srel)) ;
/*9730*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_120:
/*9740*/ IMAD.MOV.U32 R29, RZ, RZ, R4 ;
/*9750*/ MOV R5, 0x10 ;
/*9760*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9770*/ MOV R4, R16 ;
/*9780*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*9790*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_121@srel)) ;
/*97a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_121@srel)) ;
/*97b0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_121:
/*97c0*/ IMAD.MOV.U32 R36, RZ, RZ, R4 ;
/*97d0*/ MOV R5, 0x10 ;
/*97e0*/ IMAD.MOV.U32 R37, RZ, RZ, R29 ;
/*97f0*/ MOV R7, 0xffffffff ;
/*9800*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9810*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_122@srel)) ;
/*9820*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*9830*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_122@srel)) ;
/*9840*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_122:
/*9850*/ IMAD.MOV.U32 R29, RZ, RZ, R4 ;
/*9860*/ MOV R5, 0x10 ;
/*9870*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9880*/ MOV R4, R18 ;
/*9890*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*98a0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_123@srel)) ;
/*98b0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_123@srel)) ;
/*98c0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_123:
/*98d0*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*98e0*/ MOV R5, 0x10 ;
/*98f0*/ IMAD.MOV.U32 R39, RZ, RZ, R29 ;
/*9900*/ MOV R7, 0xffffffff ;
/*9910*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9920*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_124@srel)) ;
/*9930*/ IMAD.MOV.U32 R4, RZ, RZ, R35 ;
/*9940*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_124@srel)) ;
/*9950*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_124:
/*9960*/ IMAD.MOV.U32 R29, RZ, RZ, R4 ;
/*9970*/ MOV R5, 0x10 ;
/*9980*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9990*/ MOV R4, R34 ;
/*99a0*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*99b0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_125@srel)) ;
/*99c0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_125@srel)) ;
/*99d0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_125:
/*99e0*/ DADD R36, R16, R36 ;
/*99f0*/ IMAD.MOV.U32 R5, RZ, RZ, R29 ;
/*9a00*/ MOV R7, 0xffffffff ;
/*9a10*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9a20*/ DADD R18, R18, R38 ;
/*9a30*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_126@srel)) ;
/*9a40*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_126@srel)) ;
/*9a50*/ DADD R34, R4, R34 ;
/*9a60*/ MOV R5, 0x8 ;
/*9a70*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*9a80*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_126:
/*9a90*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*9aa0*/ MOV R5, 0x8 ;
/*9ab0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9ac0*/ MOV R7, 0xffffffff ;
/*9ad0*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*9ae0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_127@srel)) ;
/*9af0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_127@srel)) ;
/*9b00*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_127:
/*9b10*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*9b20*/ MOV R5, 0x8 ;
/*9b30*/ IMAD.MOV.U32 R39, RZ, RZ, R17 ;
/*9b40*/ MOV R7, 0xffffffff ;
/*9b50*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9b60*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_128@srel)) ;
/*9b70*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*9b80*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_128@srel)) ;
/*9b90*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_128:
/*9ba0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*9bb0*/ MOV R5, 0x8 ;
/*9bc0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9bd0*/ MOV R7, 0xffffffff ;
/*9be0*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*9bf0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_129@srel)) ;
/*9c00*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_129@srel)) ;
/*9c10*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_129:
/*9c20*/ IMAD.MOV.U32 R42, RZ, RZ, R4 ;
/*9c30*/ MOV R5, 0x8 ;
/*9c40*/ IMAD.MOV.U32 R43, RZ, RZ, R17 ;
/*9c50*/ MOV R7, 0xffffffff ;
/*9c60*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9c70*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_130@srel)) ;
/*9c80*/ IMAD.MOV.U32 R4, RZ, RZ, R35 ;
/*9c90*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_130@srel)) ;
/*9ca0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_130:
/*9cb0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*9cc0*/ MOV R5, 0x8 ;
/*9cd0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9ce0*/ MOV R7, 0xffffffff ;
/*9cf0*/ IMAD.MOV.U32 R4, RZ, RZ, R34 ;
/*9d00*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_131@srel)) ;
/*9d10*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_131@srel)) ;
/*9d20*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_131:
/*9d30*/ DADD R36, R36, R38 ;
/*9d40*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*9d50*/ MOV R7, 0xffffffff ;
/*9d60*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9d70*/ DADD R18, R18, R42 ;
/*9d80*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_132@srel)) ;
/*9d90*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_132@srel)) ;
/*9da0*/ DADD R34, R34, R4 ;
/*9db0*/ MOV R5, 0x4 ;
/*9dc0*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*9dd0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_132:
/*9de0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*9df0*/ MOV R5, 0x4 ;
/*9e00*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9e10*/ MOV R7, 0xffffffff ;
/*9e20*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*9e30*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_133@srel)) ;
/*9e40*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_133@srel)) ;
/*9e50*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_133:
/*9e60*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*9e70*/ MOV R5, 0x4 ;
/*9e80*/ IMAD.MOV.U32 R39, RZ, RZ, R17 ;
/*9e90*/ MOV R7, 0xffffffff ;
/*9ea0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9eb0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_134@srel)) ;
/*9ec0*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*9ed0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_134@srel)) ;
/*9ee0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_134:
/*9ef0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*9f00*/ MOV R5, 0x4 ;
/*9f10*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9f20*/ MOV R7, 0xffffffff ;
/*9f30*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*9f40*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_135@srel)) ;
/*9f50*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_135@srel)) ;
/*9f60*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_135:
/*9f70*/ IMAD.MOV.U32 R42, RZ, RZ, R4 ;
/*9f80*/ MOV R5, 0x4 ;
/*9f90*/ IMAD.MOV.U32 R43, RZ, RZ, R17 ;
/*9fa0*/ MOV R7, 0xffffffff ;
/*9fb0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*9fc0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_136@srel)) ;
/*9fd0*/ IMAD.MOV.U32 R4, RZ, RZ, R35 ;
/*9fe0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_136@srel)) ;
/*9ff0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_136:
/*a000*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*a010*/ MOV R5, 0x4 ;
/*a020*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a030*/ MOV R7, 0xffffffff ;
/*a040*/ IMAD.MOV.U32 R4, RZ, RZ, R34 ;
/*a050*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_137@srel)) ;
/*a060*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_137@srel)) ;
/*a070*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_137:
/*a080*/ DADD R36, R36, R38 ;
/*a090*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*a0a0*/ MOV R7, 0xffffffff ;
/*a0b0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a0c0*/ DADD R18, R18, R42 ;
/*a0d0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_138@srel)) ;
/*a0e0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_138@srel)) ;
/*a0f0*/ DADD R34, R34, R4 ;
/*a100*/ MOV R5, 0x2 ;
/*a110*/ IMAD.MOV.U32 R4, RZ, RZ, R37 ;
/*a120*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_138:
/*a130*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*a140*/ MOV R5, 0x2 ;
/*a150*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a160*/ MOV R7, 0xffffffff ;
/*a170*/ IMAD.MOV.U32 R4, RZ, RZ, R36 ;
/*a180*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_139@srel)) ;
/*a190*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_139@srel)) ;
/*a1a0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_139:
/*a1b0*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*a1c0*/ MOV R5, 0x2 ;
/*a1d0*/ IMAD.MOV.U32 R39, RZ, RZ, R17 ;
/*a1e0*/ MOV R7, 0xffffffff ;
/*a1f0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a200*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_140@srel)) ;
/*a210*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*a220*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_140@srel)) ;
/*a230*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_140:
/*a240*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*a250*/ MOV R5, 0x2 ;
/*a260*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a270*/ MOV R7, 0xffffffff ;
/*a280*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*a290*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_141@srel)) ;
/*a2a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_141@srel)) ;
/*a2b0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_141:
/*a2c0*/ IMAD.MOV.U32 R42, RZ, RZ, R4 ;
/*a2d0*/ MOV R5, 0x2 ;
/*a2e0*/ IMAD.MOV.U32 R43, RZ, RZ, R17 ;
/*a2f0*/ MOV R7, 0xffffffff ;
/*a300*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a310*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_142@srel)) ;
/*a320*/ IMAD.MOV.U32 R4, RZ, RZ, R35 ;
/*a330*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_142@srel)) ;
/*a340*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_142:
/*a350*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*a360*/ MOV R5, 0x2 ;
/*a370*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a380*/ MOV R7, 0xffffffff ;
/*a390*/ IMAD.MOV.U32 R4, RZ, RZ, R34 ;
/*a3a0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_143@srel)) ;
/*a3b0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_143@srel)) ;
/*a3c0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_143:
/*a3d0*/ DADD R36, R36, R38 ;
/*a3e0*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*a3f0*/ MOV R7, 0xffffffff ;
/*a400*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a410*/ DADD R18, R18, R42 ;
/*a420*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_144@srel)) ;
/*a430*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_144@srel)) ;
/*a440*/ DADD R34, R34, R4 ;
/*a450*/ MOV R5, 0x1 ;
/*a460*/ IMAD.MOV.U32 R4, RZ, RZ, R37.reuse ;
/*a470*/ MOV R38, R36 ;
/*a480*/ IMAD.MOV.U32 R39, RZ, RZ, R37 ;
/*a490*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_144:
/*a4a0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*a4b0*/ MOV R5, 0x1 ;
/*a4c0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a4d0*/ MOV R4, R36 ;
/*a4e0*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*a4f0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_145@srel)) ;
/*a500*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_145@srel)) ;
/*a510*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_145:
/*a520*/ IMAD.MOV.U32 R16, RZ, RZ, R4 ;
/*a530*/ MOV R5, 0x1 ;
/*a540*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a550*/ MOV R7, 0xffffffff ;
/*a560*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*a570*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_146@srel)) ;
/*a580*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_146@srel)) ;
/*a590*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_146:
/*a5a0*/ IMAD.MOV.U32 R29, RZ, RZ, R4 ;
/*a5b0*/ MOV R5, 0x1 ;
/*a5c0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a5d0*/ MOV R7, 0xffffffff ;
/*a5e0*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*a5f0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_147@srel)) ;
/*a600*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_147@srel)) ;
/*a610*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_147:
/*a620*/ IMAD.MOV.U32 R36, RZ, RZ, R4 ;
/*a630*/ MOV R5, 0x1 ;
/*a640*/ IMAD.MOV.U32 R37, RZ, RZ, R29 ;
/*a650*/ MOV R7, 0xffffffff ;
/*a660*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a670*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_148@srel)) ;
/*a680*/ IMAD.MOV.U32 R4, RZ, RZ, R35 ;
/*a690*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_148@srel)) ;
/*a6a0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_148:
/*a6b0*/ IMAD.MOV.U32 R29, RZ, RZ, R4 ;
/*a6c0*/ MOV R5, 0x1 ;
/*a6d0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a6e0*/ MOV R7, 0xffffffff ;
/*a6f0*/ IMAD.MOV.U32 R4, RZ, RZ, R34 ;
/*a700*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_149@srel)) ;
/*a710*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_149@srel)) ;
/*a720*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_149:
/*a730*/ BRA `(.L_x_767) ;
.L_x_664:
/*a740*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*a750*/ MOV R6, 0x1f ;
/*a760*/ IMAD.MOV.U32 R5, RZ, RZ, R29 ;
/*a770*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_150@srel)) ;
/*a780*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*a790*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_150@srel)) ;
/*a7a0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_150:
/*a7b0*/ IMAD.MOV.U32 R39, RZ, RZ, R4 ;
/*a7c0*/ MOV R5, R29 ;
/*a7d0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a7e0*/ MOV R7, 0xffffffff ;
/*a7f0*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*a800*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_151@srel)) ;
/*a810*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_151@srel)) ;
/*a820*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_151:
/*a830*/ IMAD.MOV.U32 R38, RZ, RZ, R4 ;
/*a840*/ MOV R5, R29 ;
/*a850*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a860*/ MOV R7, 0xffffffff ;
/*a870*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*a880*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_152@srel)) ;
/*a890*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_152@srel)) ;
/*a8a0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_152:
/*a8b0*/ IMAD.MOV.U32 R37, RZ, RZ, R4 ;
/*a8c0*/ MOV R5, R29 ;
/*a8d0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a8e0*/ MOV R7, 0xffffffff ;
/*a8f0*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*a900*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_153@srel)) ;
/*a910*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_153@srel)) ;
/*a920*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_153:
/*a930*/ IMAD.MOV.U32 R36, RZ, RZ, R4 ;
/*a940*/ MOV R5, R29 ;
/*a950*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a960*/ MOV R7, 0xffffffff ;
/*a970*/ IMAD.MOV.U32 R4, RZ, RZ, R35 ;
/*a980*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_154@srel)) ;
/*a990*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_154@srel)) ;
/*a9a0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_154:
/*a9b0*/ IMAD.MOV.U32 R33, RZ, RZ, R4 ;
/*a9c0*/ MOV R5, R29 ;
/*a9d0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*a9e0*/ MOV R7, 0xffffffff ;
/*a9f0*/ IMAD.MOV.U32 R4, RZ, RZ, R34 ;
/*aa00*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_155@srel)) ;
/*aa10*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_155@srel)) ;
/*aa20*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_155:
/*aa30*/ MOV R2, R4 ;
/*aa40*/ BRA `(.L_x_768) ;
.L_x_672:
/*aa50*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*aa60*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_156@srel)) ;
/*aa70*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_156@srel)) ;
/*aa80*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_156:
/*aa90*/ BRA `(.L_x_671) ;
.L_x_679:
/*aaa0*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*aab0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_157@srel)) ;
/*aac0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_157@srel)) ;
/*aad0*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_157:
/*aae0*/ BRA `(.L_x_678) ;
.L_x_683:
/*aaf0*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*ab00*/ MOV R6, 0x1f ;
/*ab10*/ IMAD.MOV.U32 R5, RZ, RZ, R39 ;
/*ab20*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_158@srel)) ;
/*ab30*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*ab40*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_158@srel)) ;
/*ab50*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_158:
/*ab60*/ IMAD.MOV.U32 R37, RZ, RZ, R4 ;
/*ab70*/ MOV R5, R39 ;
/*ab80*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*ab90*/ MOV R7, 0xffffffff ;
/*aba0*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*abb0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_159@srel)) ;
/*abc0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_159@srel)) ;
/*abd0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_159:
/*abe0*/ IMAD.MOV.U32 R36, RZ, RZ, R4 ;
/*abf0*/ MOV R5, R39 ;
/*ac00*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*ac10*/ MOV R7, 0xffffffff ;
/*ac20*/ IMAD.MOV.U32 R4, RZ, RZ, R33 ;
/*ac30*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_160@srel)) ;
/*ac40*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_160@srel)) ;
/*ac50*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_160:
/*ac60*/ IMAD.MOV.U32 R35, RZ, RZ, R4 ;
/*ac70*/ MOV R5, R39 ;
/*ac80*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*ac90*/ MOV R7, 0xffffffff ;
/*aca0*/ IMAD.MOV.U32 R4, RZ, RZ, R32 ;
/*acb0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_161@srel)) ;
/*acc0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_161@srel)) ;
/*acd0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_161:
/*ace0*/ IMAD.MOV.U32 R34, RZ, RZ, R4 ;
/*acf0*/ MOV R5, R39 ;
/*ad00*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*ad10*/ MOV R7, 0xffffffff ;
/*ad20*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*ad30*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_162@srel)) ;
/*ad40*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_162@srel)) ;
/*ad50*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_162:
/*ad60*/ IMAD.MOV.U32 R29, RZ, RZ, R4 ;
/*ad70*/ MOV R5, R39 ;
/*ad80*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*ad90*/ MOV R7, 0xffffffff ;
/*ada0*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*adb0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_163@srel)) ;
/*adc0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_163@srel)) ;
/*add0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_163:
/*ade0*/ BRA `(.L_x_769) ;
.L_x_701:
/*adf0*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*ae00*/ MOV R6, 0x1f ;
/*ae10*/ IMAD.MOV.U32 R5, RZ, RZ, 0x10 ;
/*ae20*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_164@srel)) ;
/*ae30*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*ae40*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_164@srel)) ;
/*ae50*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_164:
/*ae60*/ IMAD.MOV.U32 R19, RZ, RZ, R4 ;
/*ae70*/ MOV R5, 0x10 ;
/*ae80*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*ae90*/ MOV R7, 0xffffffff ;
/*aea0*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*aeb0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_165@srel)) ;
/*aec0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_165@srel)) ;
/*aed0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_165:
/*aee0*/ IMAD.MOV.U32 R5, RZ, RZ, R19 ;
/*aef0*/ MOV R7, 0xffffffff ;
/*af00*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*af10*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_166@srel)) ;
/*af20*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_166@srel)) ;
/*af30*/ DADD R18, R16, R4 ;
/*af40*/ MOV R5, 0x8 ;
/*af50*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*af60*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_166:
/*af70*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*af80*/ MOV R5, 0x8 ;
/*af90*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*afa0*/ MOV R7, 0xffffffff ;
/*afb0*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*afc0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_167@srel)) ;
/*afd0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_167@srel)) ;
/*afe0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_167:
/*aff0*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*b000*/ MOV R7, 0xffffffff ;
/*b010*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b020*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_168@srel)) ;
/*b030*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_168@srel)) ;
/*b040*/ DADD R18, R18, R4 ;
/*b050*/ MOV R5, 0x4 ;
/*b060*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*b070*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_168:
/*b080*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*b090*/ MOV R5, 0x4 ;
/*b0a0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b0b0*/ MOV R7, 0xffffffff ;
/*b0c0*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*b0d0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_169@srel)) ;
/*b0e0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_169@srel)) ;
/*b0f0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_169:
/*b100*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*b110*/ MOV R7, 0xffffffff ;
/*b120*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b130*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_170@srel)) ;
/*b140*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_170@srel)) ;
/*b150*/ DADD R18, R18, R4 ;
/*b160*/ MOV R5, 0x2 ;
/*b170*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*b180*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_170:
/*b190*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*b1a0*/ MOV R5, 0x2 ;
/*b1b0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b1c0*/ MOV R7, 0xffffffff ;
/*b1d0*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*b1e0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_171@srel)) ;
/*b1f0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_171@srel)) ;
/*b200*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_171:
/*b210*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*b220*/ MOV R7, 0xffffffff ;
/*b230*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b240*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_172@srel)) ;
/*b250*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_172@srel)) ;
/*b260*/ DADD R18, R18, R4 ;
/*b270*/ MOV R5, 0x1 ;
/*b280*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*b290*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_172:
/*b2a0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*b2b0*/ MOV R5, 0x1 ;
/*b2c0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b2d0*/ MOV R4, R18 ;
/*b2e0*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*b2f0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_173@srel)) ;
/*b300*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_173@srel)) ;
/*b310*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_173:
/*b320*/ BRA `(.L_x_770) ;
.L_x_703:
/*b330*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*b340*/ MOV R6, 0x1f ;
/*b350*/ IMAD.MOV.U32 R5, RZ, RZ, R19 ;
/*b360*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_174@srel)) ;
/*b370*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*b380*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_174@srel)) ;
/*b390*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_174:
/*b3a0*/ IMAD.MOV.U32 R29, RZ, RZ, R4 ;
/*b3b0*/ MOV R5, R19 ;
/*b3c0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b3d0*/ MOV R7, 0xffffffff ;
/*b3e0*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*b3f0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_175@srel)) ;
/*b400*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_175@srel)) ;
/*b410*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_175:
/*b420*/ BRA `(.L_x_771) ;
.L_x_711:
/*b430*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*b440*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_176@srel)) ;
/*b450*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_176@srel)) ;
/*b460*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_176:
/*b470*/ BRA `(.L_x_710) ;
.L_x_716:
/*b480*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*b490*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_177@srel)) ;
/*b4a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_177@srel)) ;
/*b4b0*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_177:
/*b4c0*/ BRA `(.L_x_715) ;
.L_x_719:
/*b4d0*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*b4e0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_178@srel)) ;
/*b4f0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_178@srel)) ;
/*b500*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_178:
/*b510*/ BRA `(.L_x_720) ;
.L_x_723:
/*b520*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*b530*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_179@srel)) ;
/*b540*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_179@srel)) ;
/*b550*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_179:
/*b560*/ BRA `(.L_x_724) ;
.L_x_725:
/*b570*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*b580*/ MOV R6, 0x1f ;
/*b590*/ IMAD.MOV.U32 R5, RZ, RZ, R29 ;
/*b5a0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_180@srel)) ;
/*b5b0*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*b5c0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_180@srel)) ;
/*b5d0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_180:
/*b5e0*/ IMAD.MOV.U32 R19, RZ, RZ, R4 ;
/*b5f0*/ MOV R5, R29 ;
/*b600*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b610*/ MOV R7, 0xffffffff ;
/*b620*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*b630*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_181@srel)) ;
/*b640*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_181@srel)) ;
/*b650*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_181:
/*b660*/ BRA `(.L_x_772) ;
.L_x_739:
/*b670*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*b680*/ MOV R6, 0x1f ;
/*b690*/ IMAD.MOV.U32 R5, RZ, RZ, 0x10 ;
/*b6a0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_182@srel)) ;
/*b6b0*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*b6c0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_182@srel)) ;
/*b6d0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_182:
/*b6e0*/ IMAD.MOV.U32 R19, RZ, RZ, R4 ;
/*b6f0*/ MOV R5, 0x10 ;
/*b700*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b710*/ MOV R7, 0xffffffff ;
/*b720*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*b730*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_183@srel)) ;
/*b740*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_183@srel)) ;
/*b750*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_183:
/*b760*/ IMAD.MOV.U32 R5, RZ, RZ, R19 ;
/*b770*/ MOV R7, 0xffffffff ;
/*b780*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b790*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_184@srel)) ;
/*b7a0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_184@srel)) ;
/*b7b0*/ DADD R18, R16, R4 ;
/*b7c0*/ MOV R5, 0x8 ;
/*b7d0*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*b7e0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_184:
/*b7f0*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*b800*/ MOV R5, 0x8 ;
/*b810*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b820*/ MOV R7, 0xffffffff ;
/*b830*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*b840*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_185@srel)) ;
/*b850*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_185@srel)) ;
/*b860*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_185:
/*b870*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*b880*/ MOV R7, 0xffffffff ;
/*b890*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b8a0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_186@srel)) ;
/*b8b0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_186@srel)) ;
/*b8c0*/ DADD R18, R18, R4 ;
/*b8d0*/ MOV R5, 0x4 ;
/*b8e0*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*b8f0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_186:
/*b900*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*b910*/ MOV R5, 0x4 ;
/*b920*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b930*/ MOV R7, 0xffffffff ;
/*b940*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*b950*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_187@srel)) ;
/*b960*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_187@srel)) ;
/*b970*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_187:
/*b980*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*b990*/ MOV R7, 0xffffffff ;
/*b9a0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*b9b0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_188@srel)) ;
/*b9c0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_188@srel)) ;
/*b9d0*/ DADD R18, R18, R4 ;
/*b9e0*/ MOV R5, 0x2 ;
/*b9f0*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*ba00*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_188:
/*ba10*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*ba20*/ MOV R5, 0x2 ;
/*ba30*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*ba40*/ MOV R7, 0xffffffff ;
/*ba50*/ IMAD.MOV.U32 R4, RZ, RZ, R18 ;
/*ba60*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_189@srel)) ;
/*ba70*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_189@srel)) ;
/*ba80*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_189:
/*ba90*/ IMAD.MOV.U32 R5, RZ, RZ, R17 ;
/*baa0*/ MOV R7, 0xffffffff ;
/*bab0*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*bac0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_190@srel)) ;
/*bad0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_190@srel)) ;
/*bae0*/ DADD R18, R18, R4 ;
/*baf0*/ MOV R5, 0x1 ;
/*bb00*/ IMAD.MOV.U32 R4, RZ, RZ, R19 ;
/*bb10*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_190:
/*bb20*/ IMAD.MOV.U32 R17, RZ, RZ, R4 ;
/*bb30*/ MOV R5, 0x1 ;
/*bb40*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*bb50*/ MOV R4, R18 ;
/*bb60*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*bb70*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_191@srel)) ;
/*bb80*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_191@srel)) ;
/*bb90*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_191:
/*bba0*/ BRA `(.L_x_773) ;
.L_x_741:
/*bbb0*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*bbc0*/ MOV R6, 0x1f ;
/*bbd0*/ IMAD.MOV.U32 R5, RZ, RZ, R19 ;
/*bbe0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_192@srel)) ;
/*bbf0*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*bc00*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_192@srel)) ;
/*bc10*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_192:
/*bc20*/ IMAD.MOV.U32 R33, RZ, RZ, R4 ;
/*bc30*/ MOV R5, R19 ;
/*bc40*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*bc50*/ MOV R7, 0xffffffff ;
/*bc60*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*bc70*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_193@srel)) ;
/*bc80*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_193@srel)) ;
/*bc90*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_193:
/*bca0*/ BRA `(.L_x_774) ;
.L_x_744:
/*bcb0*/ IMAD.MOV.U32 R5, RZ, RZ, R2 ;
/*bcc0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_194@srel)) ;
/*bcd0*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*bce0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_194@srel)) ;
/*bcf0*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_194:
/*bd00*/ BRA `(.L_x_775) ;
.L_x_749:
/*bd10*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*bd20*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_195@srel)) ;
/*bd30*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_195@srel)) ;
/*bd40*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_195:
/*bd50*/ BRA `(.L_x_748) ;
.L_x_752:
/*bd60*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*bd70*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_196@srel)) ;
/*bd80*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_196@srel)) ;
/*bd90*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_196:
/*bda0*/ BRA `(.L_x_753) ;
.L_x_756:
/*bdb0*/ IMAD.MOV.U32 R4, RZ, RZ, 0x7 ;
/*bdc0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_197@srel)) ;
/*bdd0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_197@srel)) ;
/*bde0*/ CALL.ABS.NOINC `(__cuda_sm70_barrier_sync_count) ;
.L_x_197:
/*bdf0*/ BRA `(.L_x_757) ;
.L_x_759:
/*be00*/ IMAD.MOV.U32 R4, RZ, RZ, R17 ;
/*be10*/ MOV R6, 0x1f ;
/*be20*/ IMAD.MOV.U32 R5, RZ, RZ, R33 ;
/*be30*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_198@srel)) ;
/*be40*/ IMAD.MOV.U32 R7, RZ, RZ, -0x1 ;
/*be50*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_198@srel)) ;
/*be60*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_198:
/*be70*/ IMAD.MOV.U32 R19, RZ, RZ, R4 ;
/*be80*/ MOV R5, R33 ;
/*be90*/ IMAD.MOV.U32 R6, RZ, RZ, 0x1f ;
/*bea0*/ MOV R7, 0xffffffff ;
/*beb0*/ IMAD.MOV.U32 R4, RZ, RZ, R16 ;
/*bec0*/ MOV R20, 32@lo((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_199@srel)) ;
/*bed0*/ MOV R21, 32@hi((__omp_offloading_10301_25680e2__ZN11qmcplusplus21MultiDiracDeterminant34mw_evaluateDetsAndGradsForPtclMoveERKNS_19RefVectorWithLeaderIS0_EERKNS1_INS_11ParticleSetEEEiRNS_6MatrixIdNS_12OMPallocatorIdNS_10MallocatorIdLm32EEEEEEE_l783 + .L_x_199@srel)) ;
/*bee0*/ CALL.ABS.NOINC `(__cuda_sm70_shflsync_down) ;
.L_x_199:
/*bef0*/ BRA `(.L_x_776) ;
.L_x_777:
/*bf00*/ BRA `(.L_x_777);
/*bf10*/ NOP;
/*bf20*/ NOP;
/*bf30*/ NOP;
/*bf40*/ NOP;
/*bf50*/ NOP;
/*bf60*/ NOP;
/*bf70*/ NOP;
/*bf80*/ NOP;
/*bf90*/ NOP;
/*bfa0*/ NOP;
/*bfb0*/ NOP;
/*bfc0*/ NOP;
/*bfd0*/ NOP;
/*bfe0*/ NOP;
/*bff0*/ NOP;
.L_x_974:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment