Skip to content

Instantly share code, notes, and snippets.

@Artem-B
Created October 11, 2021 20:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Artem-B/0e8786afff6e6838b5cf5a9e21851b5c to your computer and use it in GitHub Desktop.
Save Artem-B/0e8786afff6e6838b5cf5a9e21851b5c to your computer and use it in GitHub Desktop.
;*** IR Dump After Combine redundant instructions *** (function: _ZN8cuforces12forcesDeviceI13forces_paramsIL10KernelType3EL14SPHFormulation1EL20DensityDiffusionType3EL12BoundaryType4E12FullViscSpecIL12RheologyType0EL15TurbulenceModel1EL26ComputationalViscosityType0EL12ViscousModel0EL15AverageOperator0ELm517ELb0EELm517EL12ParticleType1ELSD_0EL7RunMode1ELb0ELb0ELb0ELb0E5emptyI18xsph_forces_paramsESF_I20volume_forces_paramsESF_I21grenier_forces_paramsESF_I25sa_boundary_forces_paramsESF_I28dummy_boundary_forces_paramsESF_I25water_depth_forces_paramsESF_I18keps_forces_paramsESF_I14tau_tex_paramsESF_I22eulerVel_forces_paramsESF_I29internal_energy_forces_paramsESF_I28effective_visc_forces_paramsEELS2_3ELS3_1ELS4_3ELS5_4ESC_Lm517ELSD_1ELSD_0EEEvT_)
; ModuleID = 'reduced.ll.ll'
source_filename = "<stdin>"
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
%struct.char3 = type { i8, i8, i8 }
%"class.cuneibs::neiblist_iterator_core" = type <{ i32*, i16*, %struct.float4, %struct.int3, i32, %struct.float3, [4 x i8], i64, i32, i8, [3 x i8], i32, [12 x i8] }>
%struct.float4 = type { float, float, float, float }
%struct.int3 = type { i32, i32, i32 }
%struct.float3 = type { float, float, float }
%struct.pos_wrapper = type { %struct.float4* }
%"class.cuneibs::neiblist_iterator.1" = type { %"class.cuneibs::neiblist_iterator_simple.base.3", [8 x i8], %"class.cuneibs::neiblist_iterator_core.base", [12 x i8] }
%"class.cuneibs::neiblist_iterator_simple.base.3" = type { i32 (...)** }
%"class.cuneibs::neiblist_iterator_core.base" = type <{ i32*, i16*, %struct.float4, %struct.int3, i32, %struct.float3, [4 x i8], i64, i32, i8, [3 x i8], i32 }>
%"class.cuneibs::neiblist_iterator_simple.2" = type { i32 (...)**, [8 x i8], %"class.cuneibs::neiblist_iterator_core.base", [12 x i8] }
%struct.forces_params.415 = type { %struct.common_forces_params }
%struct.common_forces_params = type { %struct.stage_common_forces_params.base, i16*, float, float, i32, float }
%struct.stage_common_forces_params.base = type <{ %struct.pos_info_wrapper, %struct.vel_wrapper, %struct.float4*, i32*, i32*, i32, i32, float }>
%struct.pos_info_wrapper = type { %struct.pos_wrapper, %struct.info_wrapper }
%struct.info_wrapper = type { %struct.ushort4* }
%struct.ushort4 = type { i16, i16, i16, i16 }
%struct.vel_wrapper = type { %struct.float4* }
%struct.cell_params = type { %struct.cellStart_wrapper, %struct.cellEnd_wrapper }
%struct.cellStart_wrapper = type { i32* }
%struct.cellEnd_wrapper = type { i32* }
%struct.uint4 = type { i32, i32, i32, i32 }
%struct.jacobi_update_params = type <{ %struct.info_wrapper, %struct.float4*, float*, float*, i32, [4 x i8] }>
%struct.sa_outgoing_bc_params = type { %struct.neibs_list_params.base, %struct.vel_wrapper, %struct.boundelements_wrapper, %struct.vertPos_params, %struct.uint4*, %struct.float4* }
%struct.neibs_list_params.base = type <{ %struct.pos_info_wrapper, i32*, i32*, i16*, i32, float, float }>
%struct.boundelements_wrapper = type { %struct.float4* }
%struct.vertPos_params = type { %struct.float2.0*, %struct.float2.0*, %struct.float2.0* }
%struct.float2.0 = type { float, float }
%struct.reorder_params = type { %struct.reorder_data, %struct.reorder_data.9, %struct.reorder_data.30 }
%struct.reorder_data = type { %struct.float4*, %struct.float4* }
%struct.reorder_data.9 = type { %struct.float4*, %struct.float4* }
%struct.reorder_data.30 = type { %struct.float4*, %struct.float4* }
%"class.thrust::zip_iterator" = type { %"class.thrust::tuple.32" }
%"class.thrust::tuple.32" = type { %"struct.thrust::detail::cons.33" }
%"struct.thrust::detail::cons.33" = type { %"class.thrust::device_ptr", %"struct.thrust::detail::cons.35" }
%"class.thrust::device_ptr" = type { %"class.thrust::pointer" }
%"class.thrust::pointer" = type { %"class.thrust::iterator_adaptor" }
%"class.thrust::iterator_adaptor" = type { i32* }
%"struct.thrust::detail::cons.35" = type { %"class.thrust::device_ptr.36" }
%"class.thrust::device_ptr.36" = type { %"class.thrust::pointer.37" }
%"class.thrust::pointer.37" = type { %"class.thrust::iterator_adaptor.38" }
%"class.thrust::iterator_adaptor.38" = type { %struct.ushort4* }
%"class.thrust::tuple" = type { %"struct.thrust::detail::cons" }
%"struct.thrust::detail::cons" = type { i32, [4 x i8], %"struct.thrust::detail::cons.31" }
%"struct.thrust::detail::cons.31" = type { %struct.ushort4 }
%struct.ptype_hash_compare = type { i8 }
%"struct.thrust::cuda_cub::__transform::unary_transform_f" = type <{ %"class.thrust::device_ptr", i32*, %"struct.thrust::identity", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%"struct.thrust::identity" = type { i8 }
%"struct.thrust::cuda_cub::__transform::always_true_predicate" = type { i8 }
%"struct.thrust::cuda_cub::__transform::unary_transform_f.69" = type <{ %"class.thrust::device_ptr.36", %struct.ushort4*, %"struct.thrust::identity.70", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%"struct.thrust::identity.70" = type { i8 }
%"struct.thrust::cuda_cub::__transform::unary_transform_f.72" = type <{ i32*, %"class.thrust::device_ptr", %"struct.thrust::identity", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%"struct.thrust::cuda_cub::__transform::unary_transform_f.74" = type <{ %struct.ushort4*, %"class.thrust::device_ptr.36", %"struct.thrust::identity.70", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%"struct.thrust::cuda_cub::__transform::unary_transform_f.79" = type <{ %"class.thrust::device_ptr.36", %"class.thrust::device_ptr.36", %"struct.thrust::identity.70", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%"struct.thrust::cuda_cub::__transform::unary_transform_f.81" = type <{ %"class.thrust::device_ptr", %"class.thrust::device_ptr", %"struct.thrust::identity", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%struct.buildneibs_params = type { %struct.common_buildneibs_params, %struct.planes_buildneibs_params }
%struct.common_buildneibs_params = type { %struct.pos_info_wrapper, %struct.cell_params, i32*, i16*, i32, float }
%struct.planes_buildneibs_params = type { %struct.int4* }
%struct.int4 = type { i32, i32, i32, i32 }
%struct.euler_params = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%struct.common_euler_params.base = type <{ %struct.Pos_params, %struct.Vel_params, i32*, %struct.ushort4*, %struct.float4*, i32, float, float }>
%struct.Pos_params = type { %struct.float4*, %struct.float4* }
%struct.Vel_params = type { %struct.float4*, %struct.float4* }
%struct.dummy_euler_params = type { %struct.float4* }
%struct.euler_params.111 = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%struct.euler_params.114 = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%struct.euler_params.117 = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%"struct.cub::ReduceByKeyScanTileState" = type { %"struct.cub::ScanTileState" }
%"struct.cub::ScanTileState" = type { i8*, %"struct.cub::KeyValuePair"*, %"struct.cub::KeyValuePair"* }
%"struct.cub::KeyValuePair" = type { i32, [12 x i8], %struct.float4 }
%"class.thrust::device_ptr.120" = type { %"class.thrust::pointer.121" }
%"class.thrust::pointer.121" = type { %"class.thrust::iterator_adaptor.122" }
%"class.thrust::iterator_adaptor.122" = type { i32* }
%"class.thrust::device_ptr.124" = type { %"class.thrust::pointer.125" }
%"class.thrust::pointer.125" = type { %"class.thrust::iterator_adaptor.126" }
%"class.thrust::iterator_adaptor.126" = type { %struct.float4* }
%"struct.thrust::equal_to" = type { i8 }
%"struct.thrust::plus" = type { i8 }
%"struct.thrust::cuda_cub::__scan_by_key::DoNothing" = type { i8 }
%"struct.thrust::cuda_cub::__transform::unary_transform_f.166" = type <{ %struct.float4*, %"class.thrust::device_ptr.124", %"struct.thrust::identity.167", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%"struct.thrust::identity.167" = type { i8 }
%"struct.thrust::cuda_cub::__transform::unary_transform_f.169" = type <{ %"class.thrust::device_ptr.120", i32*, %"struct.thrust::identity", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%"struct.thrust::cuda_cub::__transform::unary_transform_f.171" = type <{ %"class.thrust::device_ptr.124", %struct.float4*, %"struct.thrust::identity.167", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%struct.density_diffusion_params = type { %struct.common_density_diffusion_params.base, [4 x i8] }
%struct.common_density_diffusion_params.base = type <{ %struct.float4*, %struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i32*, i16*, i32, float, float, float, float }>
%struct.forces_params = type { %struct.common_forces_params }
%struct.forces_params.223 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.dummy_boundary_forces_params = type { %struct.float4* }
%struct.finalize_forces_params = type { %struct.common_finalize_forces_params, %struct.planes_forces_params, %struct.dyndt_finalize_forces_params }
%struct.common_finalize_forces_params = type { %struct.stage_common_forces_params.base, %struct.float4*, float, %struct.float4*, %struct.float4* }
%struct.planes_forces_params = type { %struct.int4* }
%struct.dyndt_finalize_forces_params = type { float*, float*, float*, i32, i32 }
%struct.forces_params.236 = type { %struct.common_forces_params }
%struct.forces_params.250 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.256 = type { %struct.common_forces_params }
%struct.finalize_forces_params.262 = type { %struct.common_finalize_forces_params.263, %struct.planes_forces_params, %struct.dyndt_finalize_forces_params }
%struct.common_finalize_forces_params.263 = type { %struct.stage_common_forces_params.base, %struct.float4*, %struct.float4*, %struct.float4* }
%struct.neibs_interaction_params = type { %struct.neibs_list_params.base, %struct.vel_wrapper }
%"struct.cupostprocess::testpoints_params" = type { %struct.neibs_interaction_params, %struct.float4* }
%struct.reorder_params.278 = type { %struct.reorder_data, %struct.reorder_data.9, %struct.reorder_data.30 }
%struct.buildneibs_params.279 = type { %struct.common_buildneibs_params }
%struct.euler_params.282 = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%struct.euler_params.285 = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%struct.euler_params.288 = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%struct.euler_params.291 = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%struct.forces_params.294 = type { %struct.common_forces_params }
%struct.forces_params.300 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.finalize_forces_params.305 = type { %struct.common_finalize_forces_params, %struct.dyndt_finalize_forces_params }
%struct.forces_params.312 = type { %struct.common_forces_params }
%struct.forces_params.319 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.325 = type { %struct.common_forces_params }
%struct.finalize_forces_params.331 = type { %struct.common_finalize_forces_params.263, %struct.dyndt_finalize_forces_params }
%"struct.cupostprocess::testpoints_params.336" = type { %struct.neibs_interaction_params, %struct.float4* }
%struct.density_diffusion_params.339 = type { %struct.common_density_diffusion_params.base, [4 x i8] }
%struct.forces_params.341 = type { %struct.common_forces_params }
%struct.forces_params.346 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.351 = type { %struct.common_forces_params }
%struct.forces_params.356 = type { %struct.common_forces_params }
%struct.forces_params.361 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.366 = type { %struct.common_forces_params }
%struct.density_diffusion_params.371 = type { %struct.common_density_diffusion_params.base, [4 x i8] }
%struct.forces_params.373 = type { %struct.common_forces_params }
%struct.forces_params.378 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.383 = type { %struct.common_forces_params }
%struct.forces_params.388 = type { %struct.common_forces_params }
%struct.forces_params.393 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.398 = type { %struct.common_forces_params }
%struct.density_diffusion_params.403 = type { %struct.common_density_diffusion_params.base, [4 x i8] }
%struct.forces_params.405 = type { %struct.common_forces_params }
%struct.forces_params.410 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.420 = type { %struct.common_forces_params }
%struct.forces_params.425 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.430 = type { %struct.common_forces_params }
$_ZNK7cuneibs22neiblist_iterator_core10neib_indexEv = comdat any
$_ZNK11pos_wrapper8fetchPosEj = comdat any
$_ZN7cuneibs22neiblist_iterator_core17update_neib_indexEt = comdat any
$_ZN7cuneibs12getNeibIndexERK6float4R6float3PKjtRK4int3RhRj = comdat any
$_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv = comdat any
$_ZNK7cuneibs22neiblist_iterator_core6relPosERK6float4 = comdat any
$_ZN7cuneibs14neib_list_stepIL12ParticleType0EEEmv = comdat any
$_ZTVN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEE = comdat any
@_ZN7cuneibs16d_cell_to_offsetE = external addrspace(4) externally_initialized global [27 x %struct.char3], align 1
@_ZN7cuneibs17d_neiblist_strideE = external local_unnamed_addr addrspace(4) externally_initialized global i64, align 8
@_ZTVN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEE = linkonce_odr unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* inttoptr (i64 16 to i8*), i8* null, i8* null] }, comdat, align 8
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
define linkonce_odr i32* @_ZNK7cuneibs22neiblist_iterator_core10neib_indexEv(%"class.cuneibs::neiblist_iterator_core"* %0) local_unnamed_addr comdat align 2 {
%2 = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 11
ret i32* %2
}
declare i1 @_ZL8isfinitef(float) local_unnamed_addr
define linkonce_odr %struct.float4 @_ZNK11pos_wrapper8fetchPosEj(%struct.pos_wrapper* %0, i32 %1) local_unnamed_addr comdat align 2 {
%3 = getelementptr inbounds %struct.pos_wrapper, %struct.pos_wrapper* %0, i64 0, i32 0
%4 = load %struct.float4*, %struct.float4** %3, align 8
%5 = zext i32 %1 to i64
%6 = getelementptr inbounds %struct.float4, %struct.float4* %4, i64 %5
%7 = bitcast %struct.float4* %6 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1 %7, i64 undef, i1 false)
ret %struct.float4 undef
}
declare %struct.float4 @_ZL11make_float4ffff(float, float) local_unnamed_addr
declare %struct.float3 @_ZL11make_float3fff(float, float) local_unnamed_addr
define linkonce_odr void @_ZN7cuneibs22neiblist_iterator_coreC2EjRK6float4RK4int3PKjPKt(%"class.cuneibs::neiblist_iterator_core"* %0, i16* %1) unnamed_addr align 2 {
%3 = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 1
store i16* %1, i16** %3, align 8
ret void
}
define linkonce_odr void @_ZN7cuneibs22neiblist_iterator_core17update_neib_indexEt(%"class.cuneibs::neiblist_iterator_core"* %0, i16 %1) local_unnamed_addr comdat align 2 {
%3 = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 0
%4 = load i32*, i32** %3, align 16
%5 = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 9
%6 = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 8
%7 = lshr i16 %1, 11
%8 = trunc i16 %7 to i8
%9 = add nsw i8 %8, -1
store i8 %9, i8* %5, align 1
%10 = tail call %struct.float3 @_ZmlRK5char3RK6float3()
%11 = extractvalue %struct.float3 %10, 0
%12 = extractvalue %struct.float3 %10, 2
%.idx.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 2, i32 0
%.idx.val.i = load float, float* %.idx.i, align 4
%13 = getelementptr %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 2, i32 2
%.idx5.val.i = load float, float* %13, align 4
%14 = fsub contract float %.idx.val.i, %11
%15 = fsub contract float %.idx5.val.i, %12
%16 = tail call %struct.float3 @_ZL11make_float3fff(float %14, float %15)
%oldret.i.i = extractvalue %struct.float3 %16, 0
%oldret1.i.i = extractvalue %struct.float3 %16, 1
%oldret3.i.i = extractvalue %struct.float3 %16, 2
%.sroa.014.0..sroa_idx.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 5, i32 0
store float %oldret.i.i, float* %.sroa.014.0..sroa_idx.i, align 1
%.sroa.215.0..sroa_idx16.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 5, i32 1
store float %oldret1.i.i, float* %.sroa.215.0..sroa_idx16.i, align 1
%.sroa.3.0..sroa_idx17.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 5, i32 2
store float %oldret3.i.i, float* %.sroa.3.0..sroa_idx17.i, align 1
%17 = load i8, i8* %5, align 1
%18 = zext i8 %17 to i64
%19 = getelementptr inbounds [27 x %struct.char3], [27 x %struct.char3] addrspace(4)* @_ZN7cuneibs16d_cell_to_offsetE, i64 0, i64 %18
%20 = addrspacecast %struct.char3 addrspace(4)* %19 to %struct.char3*
%.idx8.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 3, i32 0
%.idx8.val.i = load i32, i32* %.idx8.i, align 4
%.idx9.i = getelementptr %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 3, i32 1
%.idx9.val.i = load i32, i32* %.idx9.i, align 4
%.idx10.i = getelementptr %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 3, i32 2
%.idx10.val.i = load i32, i32* %.idx10.i, align 4
%.idx11.i = getelementptr %struct.char3, %struct.char3* %20, i64 0, i32 0
%.idx11.val.i = load i8, i8* %.idx11.i, align 1
%.idx12.i = getelementptr %struct.char3, %struct.char3* %20, i64 0, i32 1
%.idx12.val.i = load i8, i8* %.idx12.i, align 1
%.idx13.i = getelementptr %struct.char3, %struct.char3* %20, i64 0, i32 2
%.idx13.val.i = load i8, i8* %.idx13.i, align 1
%21 = sext i8 %.idx11.val.i to i32
%22 = add nsw i32 %.idx8.val.i, %21
%23 = sext i8 %.idx12.val.i to i32
%24 = add nsw i32 %.idx9.val.i, %23
%25 = sext i8 %.idx13.val.i to i32
%26 = add nsw i32 %.idx10.val.i, %25
%27 = tail call %struct.int3 @_ZL9make_int3iii(i32 %22, i32 %24, i32 %26)
%28 = getelementptr inbounds i32, i32* %4, i64 undef
%29 = load i32, i32* %28, align 4
store i32 %29, i32* %6, align 4
ret void
}
define linkonce_odr i32 @_ZN7cuneibs12getNeibIndexERK6float4R6float3PKjtRK4int3RhRj(%struct.float4* %0, %struct.float3* %1, i32* %2, i16 %3, %struct.int3* %4, i8* %5, i32* %6) local_unnamed_addr comdat {
%8 = lshr i16 %3, 11
%9 = trunc i16 %8 to i8
%10 = add nsw i8 %9, -1
store i8 %10, i8* %5, align 1
%11 = tail call %struct.float3 @_ZmlRK5char3RK6float3()
%12 = extractvalue %struct.float3 %11, 0
%13 = extractvalue %struct.float3 %11, 2
%.idx = getelementptr %struct.float4, %struct.float4* %0, i64 0, i32 0
%.idx.val = load float, float* %.idx, align 4
%14 = getelementptr %struct.float4, %struct.float4* %0, i64 0, i32 2
%.idx5.val = load float, float* %14, align 4
%15 = fsub contract float %.idx.val, %12
%16 = fsub contract float %.idx5.val, %13
%17 = tail call %struct.float3 @_ZL11make_float3fff(float %15, float %16)
%oldret.i = extractvalue %struct.float3 %17, 0
%oldret1.i = extractvalue %struct.float3 %17, 1
%oldret3.i = extractvalue %struct.float3 %17, 2
%.sroa.014.0..sroa_idx = getelementptr inbounds %struct.float3, %struct.float3* %1, i64 0, i32 0
store float %oldret.i, float* %.sroa.014.0..sroa_idx, align 1
%.sroa.215.0..sroa_idx16 = getelementptr inbounds %struct.float3, %struct.float3* %1, i64 0, i32 1
store float %oldret1.i, float* %.sroa.215.0..sroa_idx16, align 1
%.sroa.3.0..sroa_idx17 = getelementptr inbounds %struct.float3, %struct.float3* %1, i64 0, i32 2
store float %oldret3.i, float* %.sroa.3.0..sroa_idx17, align 1
%18 = load i8, i8* %5, align 1
%19 = zext i8 %18 to i64
%20 = getelementptr inbounds [27 x %struct.char3], [27 x %struct.char3] addrspace(4)* @_ZN7cuneibs16d_cell_to_offsetE, i64 0, i64 %19
%21 = addrspacecast %struct.char3 addrspace(4)* %20 to %struct.char3*
%.idx8 = getelementptr %struct.int3, %struct.int3* %4, i64 0, i32 0
%.idx8.val = load i32, i32* %.idx8, align 4
%.idx9 = getelementptr %struct.int3, %struct.int3* %4, i64 0, i32 1
%.idx9.val = load i32, i32* %.idx9, align 4
%.idx10 = getelementptr %struct.int3, %struct.int3* %4, i64 0, i32 2
%.idx10.val = load i32, i32* %.idx10, align 4
%.idx11 = getelementptr %struct.char3, %struct.char3* %21, i64 0, i32 0
%.idx11.val = load i8, i8* %.idx11, align 1
%.idx12 = getelementptr %struct.char3, %struct.char3* %21, i64 0, i32 1
%.idx12.val = load i8, i8* %.idx12, align 1
%.idx13 = getelementptr %struct.char3, %struct.char3* %21, i64 0, i32 2
%.idx13.val = load i8, i8* %.idx13, align 1
%22 = sext i8 %.idx11.val to i32
%23 = add nsw i32 %.idx8.val, %22
%24 = sext i8 %.idx12.val to i32
%25 = add nsw i32 %.idx9.val, %24
%26 = sext i8 %.idx13.val to i32
%27 = add nsw i32 %.idx10.val, %26
%28 = tail call %struct.int3 @_ZL9make_int3iii(i32 %23, i32 %25, i32 %27)
%29 = getelementptr inbounds i32, i32* %2, i64 undef
%30 = load i32, i32* %29, align 4
store i32 %30, i32* %6, align 4
ret i32 undef
}
declare %struct.float3 @_ZmlRK5char3RK6float3() local_unnamed_addr
declare %struct.int3 @_ZL9make_int3iii(i32, i32, i32) local_unnamed_addr
define linkonce_odr void @_ZN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEC1EjRK6float4RK4int3PKjPKt(%"class.cuneibs::neiblist_iterator.1"* %0, i16* %1) unnamed_addr align 2 {
%3 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %0, i64 0, i32 2, i32 1
store i16* %1, i16** %3, align 8
%4 = getelementptr %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %0, i64 0, i32 0, i32 0
store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEE, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)*** %4, align 8, !tbaa !125
ret void
}
define linkonce_odr i1 @_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv(%"class.cuneibs::neiblist_iterator_simple.2"* %0) local_unnamed_addr comdat align 2 {
%2 = load i64, i64* addrspacecast (i64 addrspace(4)* @_ZN7cuneibs17d_neiblist_strideE to i64*), align 8, !tbaa !128
%3 = bitcast %"class.cuneibs::neiblist_iterator_simple.2"* %0 to i8**
%4 = load i8*, i8** %3, align 8, !tbaa !125
%5 = getelementptr i8, i8* %4, i64 -24
%6 = bitcast i8* %5 to i64*
%7 = load i64, i64* %6, align 8
%8 = bitcast %"class.cuneibs::neiblist_iterator_simple.2"* %0 to i8*
%9 = getelementptr inbounds i8, i8* %8, i64 %7
%10 = getelementptr inbounds i8, i8* %9, i64 64
%11 = bitcast i8* %10 to i64*
%12 = load i64, i64* %11, align 16
%13 = add i64 %12, %2
store i64 %13, i64* %11, align 16, !tbaa !131
%14 = load i64, i64* %6, align 8
%15 = getelementptr inbounds i8, i8* %8, i64 %14
%16 = getelementptr inbounds i8, i8* %15, i64 8
%17 = bitcast i8* %16 to i16**
%18 = load i16*, i16** %17, align 8
%19 = getelementptr inbounds i8, i8* %15, i64 64
%20 = bitcast i8* %19 to i64*
%21 = load i64, i64* %20, align 16
%22 = getelementptr inbounds i8, i8* %15, i64 44
%23 = bitcast i8* %22 to i32*
%24 = load i32, i32* %23, align 4
%25 = zext i32 %24 to i64
%26 = add i64 %21, %25
%27 = getelementptr inbounds i16, i16* %18, i64 %26
%28 = load i16, i16* %27, align 2
%29 = icmp eq i16 %28, -1
br i1 %29, label %67, label %30
30: ; preds = %1
%31 = bitcast i8* %15 to i32**
%32 = load i32*, i32** %31, align 16
%33 = getelementptr inbounds i8, i8* %15, i64 76
%34 = getelementptr inbounds i8, i8* %15, i64 72
%35 = bitcast i8* %34 to i32*
%36 = lshr i16 %28, 11
%37 = trunc i16 %36 to i8
%38 = add nsw i8 %37, -1
store i8 %38, i8* %33, align 1
%39 = tail call %struct.float3 @_ZmlRK5char3RK6float3()
%40 = extractvalue %struct.float3 %39, 0
%41 = extractvalue %struct.float3 %39, 2
%.idx.i.i = getelementptr inbounds i8, i8* %15, i64 16
%42 = bitcast i8* %.idx.i.i to float*
%.idx.val.i.i = load float, float* %42, align 4
%43 = getelementptr i8, i8* %15, i64 24
%44 = bitcast i8* %43 to float*
%.idx5.val.i.i = load float, float* %44, align 4
%45 = fsub contract float %.idx.val.i.i, %40
%46 = fsub contract float %.idx5.val.i.i, %41
%47 = tail call %struct.float3 @_ZL11make_float3fff(float %45, float %46)
%oldret.i.i.i = extractvalue %struct.float3 %47, 0
%oldret1.i.i.i = extractvalue %struct.float3 %47, 1
%oldret3.i.i.i = extractvalue %struct.float3 %47, 2
%.sroa.014.0..sroa_idx.i.i = getelementptr inbounds i8, i8* %15, i64 48
%48 = bitcast i8* %.sroa.014.0..sroa_idx.i.i to float*
store float %oldret.i.i.i, float* %48, align 1
%.sroa.215.0..sroa_idx16.i.i = getelementptr inbounds i8, i8* %15, i64 52
%49 = bitcast i8* %.sroa.215.0..sroa_idx16.i.i to float*
store float %oldret1.i.i.i, float* %49, align 1
%.sroa.3.0..sroa_idx17.i.i = getelementptr inbounds i8, i8* %15, i64 56
%50 = bitcast i8* %.sroa.3.0..sroa_idx17.i.i to float*
store float %oldret3.i.i.i, float* %50, align 1
%51 = load i8, i8* %33, align 1
%52 = zext i8 %51 to i64
%53 = getelementptr inbounds [27 x %struct.char3], [27 x %struct.char3] addrspace(4)* @_ZN7cuneibs16d_cell_to_offsetE, i64 0, i64 %52
%54 = addrspacecast %struct.char3 addrspace(4)* %53 to %struct.char3*
%.idx8.i.i = getelementptr inbounds i8, i8* %15, i64 32
%55 = bitcast i8* %.idx8.i.i to i32*
%.idx8.val.i.i = load i32, i32* %55, align 4
%.idx9.i.i = getelementptr i8, i8* %15, i64 36
%56 = bitcast i8* %.idx9.i.i to i32*
%.idx9.val.i.i = load i32, i32* %56, align 4
%.idx10.i.i = getelementptr i8, i8* %15, i64 40
%57 = bitcast i8* %.idx10.i.i to i32*
%.idx10.val.i.i = load i32, i32* %57, align 4
%.idx11.i.i = getelementptr %struct.char3, %struct.char3* %54, i64 0, i32 0
%.idx11.val.i.i = load i8, i8* %.idx11.i.i, align 1
%.idx12.i.i = getelementptr %struct.char3, %struct.char3* %54, i64 0, i32 1
%.idx12.val.i.i = load i8, i8* %.idx12.i.i, align 1
%.idx13.i.i = getelementptr %struct.char3, %struct.char3* %54, i64 0, i32 2
%.idx13.val.i.i = load i8, i8* %.idx13.i.i, align 1
%58 = sext i8 %.idx11.val.i.i to i32
%59 = add nsw i32 %.idx8.val.i.i, %58
%60 = sext i8 %.idx12.val.i.i to i32
%61 = add nsw i32 %.idx9.val.i.i, %60
%62 = sext i8 %.idx13.val.i.i to i32
%63 = add nsw i32 %.idx10.val.i.i, %62
%64 = tail call %struct.int3 @_ZL9make_int3iii(i32 %59, i32 %61, i32 %63)
%65 = getelementptr inbounds i32, i32* %32, i64 undef
%66 = load i32, i32* %65, align 4
store i32 %66, i32* %35, align 4
br label %67
67: ; preds = %1, %30
%.0 = phi i1 [ true, %30 ], [ false, %1 ]
ret i1 %.0
}
define linkonce_odr %struct.float4 @_ZNK7cuneibs22neiblist_iterator_core6relPosERK6float4(%"class.cuneibs::neiblist_iterator_core"* %0, %struct.float4* %1) local_unnamed_addr comdat align 2 {
%.idx = getelementptr %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 5, i32 2
%.idx.val = load float, float* %.idx, align 4
%.idx1 = getelementptr %struct.float4, %struct.float4* %1, i64 0, i32 2
%.idx1.val = load float, float* %.idx1, align 8
%3 = fsub contract float %.idx.val, %.idx1.val
%4 = tail call %struct.float4 @_ZL11make_float4ffff(float %3, float undef)
ret %struct.float4 undef
}
define linkonce_odr i64 @_ZN7cuneibs14neib_list_stepIL12ParticleType0EEEmv() local_unnamed_addr comdat {
%1 = load i64, i64* addrspacecast (i64 addrspace(4)* @_ZN7cuneibs17d_neiblist_strideE to i64*), align 8, !tbaa !128
ret i64 %1
}
define void @_ZN8cuforces12forcesDeviceI13forces_paramsIL10KernelType3EL14SPHFormulation1EL20DensityDiffusionType3EL12BoundaryType4E12FullViscSpecIL12RheologyType0EL15TurbulenceModel1EL26ComputationalViscosityType0EL12ViscousModel0EL15AverageOperator0ELm517ELb0EELm517EL12ParticleType1ELSD_0EL7RunMode1ELb0ELb0ELb0ELb0E5emptyI18xsph_forces_paramsESF_I20volume_forces_paramsESF_I21grenier_forces_paramsESF_I25sa_boundary_forces_paramsESF_I28dummy_boundary_forces_paramsESF_I25water_depth_forces_paramsESF_I18keps_forces_paramsESF_I14tau_tex_paramsESF_I22eulerVel_forces_paramsESF_I29internal_energy_forces_paramsESF_I28effective_visc_forces_paramsEELS2_3ELS3_1ELS4_3ELS5_4ESC_Lm517ELSD_1ELSD_0EEEvT_(%struct.forces_params.415* nocapture readonly %0) local_unnamed_addr {
%2 = alloca %"class.cuneibs::neiblist_iterator.1", align 16
%3 = getelementptr inbounds %struct.forces_params.415, %struct.forces_params.415* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
%4 = bitcast %struct.forces_params.415* %0 to i8**
%5 = load i8*, i8** %4, align 8
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1 %5, i64 undef, i1 false)
%6 = getelementptr inbounds %struct.forces_params.415, %struct.forces_params.415* %0, i64 0, i32 0, i32 1
%7 = bitcast i16** %6 to i64*
%8 = load i64, i64* %7, align 8
%9 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 1
%10 = bitcast i16** %9 to i64*
store i64 %8, i64* %10, align 8
%11 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 0, i32 0
store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEE, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)*** %11, align 16, !tbaa !125
%12 = inttoptr i64 %8 to i16*
%13 = getelementptr inbounds i16, i16* %12, i64 undef
%14 = load i16, i16* %13, align 2
%15 = icmp eq i16 %14, -1
br i1 %15, label %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit, label %.lr.ph
.lr.ph: ; preds = %1
br label %16
_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit.loopexit: ; preds = %16
br label %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit
_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit: ; preds = %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit.loopexit, %1
ret void
16: ; preds = %.lr.ph, %16
%17 = phi i16 [ %14, %.lr.ph ], [ %65, %16 ]
%18 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 0
%19 = load i32*, i32** %18, align 16
%20 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 9
%21 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 8
%22 = lshr i16 %17, 11
%23 = trunc i16 %22 to i8
%24 = add nsw i8 %23, -1
store i8 %24, i8* %20, align 4
%25 = tail call %struct.float3 @_ZmlRK5char3RK6float3()
%26 = extractvalue %struct.float3 %25, 0
%27 = extractvalue %struct.float3 %25, 2
%28 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 2, i32 0
%.idx.val.i.i.i = load float, float* %28, align 16
%29 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 2, i32 2
%.idx5.val.i.i.i = load float, float* %29, align 8
%30 = fsub contract float %.idx.val.i.i.i, %26
%31 = fsub contract float %.idx5.val.i.i.i, %27
%32 = tail call %struct.float3 @_ZL11make_float3fff(float %30, float %31)
%oldret.i.i.i.i = extractvalue %struct.float3 %32, 0
%oldret1.i.i.i.i = extractvalue %struct.float3 %32, 1
%oldret3.i.i.i.i = extractvalue %struct.float3 %32, 2
%33 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 5, i32 0
store float %oldret.i.i.i.i, float* %33, align 16
%.sroa.215.0..sroa_idx16.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 5, i32 1
store float %oldret1.i.i.i.i, float* %.sroa.215.0..sroa_idx16.i.i.i, align 4
%.sroa.3.0..sroa_idx17.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 5, i32 2
store float %oldret3.i.i.i.i, float* %.sroa.3.0..sroa_idx17.i.i.i, align 8
%34 = zext i8 %24 to i64
%35 = getelementptr inbounds [27 x %struct.char3], [27 x %struct.char3] addrspace(4)* @_ZN7cuneibs16d_cell_to_offsetE, i64 0, i64 %34
%36 = addrspacecast %struct.char3 addrspace(4)* %35 to %struct.char3*
%37 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 3, i32 0
%.idx8.val.i.i.i = load i32, i32* %37, align 16
%.idx9.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 3, i32 1
%.idx9.val.i.i.i = load i32, i32* %.idx9.i.i.i, align 4
%.idx10.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 3, i32 2
%.idx10.val.i.i.i = load i32, i32* %.idx10.i.i.i, align 8
%.idx11.i.i.i = getelementptr %struct.char3, %struct.char3* %36, i64 0, i32 0
%.idx11.val.i.i.i = load i8, i8* %.idx11.i.i.i, align 1
%.idx12.i.i.i = getelementptr %struct.char3, %struct.char3* %36, i64 0, i32 1
%.idx12.val.i.i.i = load i8, i8* %.idx12.i.i.i, align 1
%.idx13.i.i.i = getelementptr %struct.char3, %struct.char3* %36, i64 0, i32 2
%.idx13.val.i.i.i = load i8, i8* %.idx13.i.i.i, align 1
%38 = sext i8 %.idx11.val.i.i.i to i32
%39 = add nsw i32 %.idx8.val.i.i.i, %38
%40 = sext i8 %.idx12.val.i.i.i to i32
%41 = add nsw i32 %.idx9.val.i.i.i, %40
%42 = sext i8 %.idx13.val.i.i.i to i32
%43 = add nsw i32 %.idx10.val.i.i.i, %42
%44 = tail call %struct.int3 @_ZL9make_int3iii(i32 %39, i32 %41, i32 %43)
%45 = getelementptr inbounds i32, i32* %19, i64 undef
%46 = load i32, i32* %45, align 4
store i32 %46, i32* %21, align 8
%47 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 11
%48 = load i32, i32* %47, align 16
%49 = load %struct.float4*, %struct.float4** %3, align 8
%50 = zext i32 %48 to i64
%51 = getelementptr inbounds %struct.float4, %struct.float4* %49, i64 %50
%52 = bitcast %struct.float4* %51 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1 %52, i64 undef, i1 false)
%.idx.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 5, i32 2
%.idx.val.i = load float, float* %.idx.i, align 8
%.idx1.val.i = load float, float* inttoptr (i64 8 to float*), align 8
%53 = fsub contract float %.idx.val.i, %.idx1.val.i
%54 = tail call %struct.float4 @_ZL11make_float4ffff(float %53, float undef)
%55 = tail call i1 @_ZL8isfinitef(float undef)
%56 = load i64, i64* addrspacecast (i64 addrspace(4)* @_ZN7cuneibs17d_neiblist_strideE to i64*), align 8, !tbaa !128
%57 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 7
%58 = load i64, i64* %57, align 16
%59 = add i64 %58, %56
store i64 %59, i64* %57, align 16, !tbaa !131
%60 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 4
%61 = load i32, i32* %60, align 4
%62 = zext i32 %61 to i64
%63 = add i64 %59, %62
%64 = getelementptr inbounds i16, i16* %12, i64 %63
%65 = load i16, i16* %64, align 2
%66 = icmp eq i16 %65, -1
br i1 %66, label %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit.loopexit, label %16
}
attributes #0 = { argmemonly nounwind willreturn }
!nvvm.annotations = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46, !47, !48, !49, !50, !51, !52, !53, !54, !55, !56, !57, !57, !58, !58, !59, !59, !60, !60, !61, !62, !63, !64, !65, !66, !67, !68, !69, !70, !71, !72, !73, !74, !75, !76, !77, !78, !79, !80, !81, !79, !80, !81, !82, !83, !84, !84, !85, !85, !86, !87, !88, !89, !90, !91, !92, !92, !93, !93, !94, !94, !95, !95, !71, !96, !97, !98, !99, !100, !101, !102, !103, !84, !84, !85, !85, !104, !105, !106, !107, !104, !108, !109, !110, !111, !112, !113, !114, !111, !115, !116, !117, !118, !119, !120, !121, !118, !122, !123, !124}
!0 = !{void (i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"kernel", i32 1}
!1 = !{void (i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"maxntidx", i32 256}
!2 = !{void (i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"minctasm", i32 6}
!3 = !{void (%struct.cell_params*, i32)* undef, !"kernel", i32 1}
!4 = !{void (%struct.float4*, %struct.ushort4*, i32*, %struct.uint4*, i32*, i16*, i32, float, float)* undef, !"kernel", i32 1}
!5 = !{void (%struct.ushort4*, %struct.float4*, %struct.uint4*, i32)* undef, !"kernel", i32 1}
!6 = !{void (%struct.ushort4*, %struct.float4*, %struct.float4*, i32, i32, float)* undef, !"kernel", i32 1}
!7 = !{void (%struct.float4*, %struct.ushort4*, i32)* undef, !"kernel", i32 1}
!8 = !{void (%struct.pos_info_wrapper*, float*, i32, float)* undef, !"kernel", i32 1}
!9 = !{void (%struct.pos_info_wrapper*, float*, i32, float)* undef, !"maxntidx", i32 128}
!10 = !{void (%struct.pos_info_wrapper*, float*, i32, float)* undef, !"minctasm", i32 6}
!11 = !{void (%struct.jacobi_update_params*)* undef, !"kernel", i32 1}
!12 = !{void (%struct.jacobi_update_params*)* undef, !"maxntidx", i32 128}
!13 = !{void (%struct.jacobi_update_params*)* undef, !"minctasm", i32 6}
!14 = !{void (%struct.ushort4*, %struct.float4*, %struct.float4*, float*, i32)* undef, !"kernel", i32 1}
!15 = !{void (%struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i32, i32, %struct.float4*)* undef, !"kernel", i32 1}
!16 = !{void (%struct.float4*, i32, i32)* undef, !"kernel", i32 1}
!17 = !{void ()* undef, !"kernel", i32 1}
!18 = !{void (float*, %struct.float4*, i32)* undef, !"kernel", i32 1}
!19 = !{void (%struct.float4*, %struct.float4*, %struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i16*, i32*, i32, float, float)* undef, !"kernel", i32 1}
!20 = !{void (%struct.float4*, %struct.float4*, %struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i16*, i32*, i32, float, float)* undef, !"maxntidx", i32 128}
!21 = !{void (%struct.float4*, %struct.float4*, %struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i16*, i32*, i32, float, float)* undef, !"minctasm", i32 6}
!22 = !{void (%struct.sa_outgoing_bc_params*)* undef, !"kernel", i32 1}
!23 = !{void (%struct.float4*, %struct.uint4*, %struct.ushort4*, i32*, i32*, i16*, i32)* undef, !"kernel", i32 1}
!24 = !{void (%struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32)* undef, !"kernel", i32 1}
!25 = !{void (%struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32)* undef, !"maxntidx", i32 128}
!26 = !{void (%struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32)* undef, !"minctasm", i32 6}
!27 = !{void (%struct.float4*, %struct.float4*, %struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32, float)* undef, !"kernel", i32 1}
!28 = !{void (%struct.float4*, %struct.float4*, %struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32, float)* undef, !"maxntidx", i32 128}
!29 = !{void (%struct.float4*, %struct.float4*, %struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32, float)* undef, !"minctasm", i32 6}
!30 = !{void (%struct.float4*, i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"kernel", i32 1}
!31 = !{void (%struct.float4*, i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"maxntidx", i32 256}
!32 = !{void (%struct.float4*, i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"minctasm", i32 6}
!33 = !{void (%struct.reorder_params*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"kernel", i32 1}
!34 = !{void (%struct.reorder_params*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"maxntidx", i32 256}
!35 = !{void (%struct.reorder_params*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"minctasm", i32 6}
!36 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::device_ptr"*, i64, %"class.thrust::tuple"*, i32*, %struct.ptype_hash_compare*)* undef, !"kernel", i32 1}
!37 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::device_ptr"*, i64, %"class.thrust::tuple"*, i32*, %struct.ptype_hash_compare*)* undef, !"maxntidx", i32 256}
!38 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f"*, i64)* undef, !"kernel", i32 1}
!39 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f"*, i64)* undef, !"maxntidx", i32 256}
!40 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.69"*, i64)* undef, !"kernel", i32 1}
!41 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.69"*, i64)* undef, !"maxntidx", i32 256}
!42 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.72"*, i64)* undef, !"kernel", i32 1}
!43 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.72"*, i64)* undef, !"maxntidx", i32 256}
!44 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.74"*, i64)* undef, !"kernel", i32 1}
!45 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.74"*, i64)* undef, !"maxntidx", i32 256}
!46 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::tuple"*, i64, i64, i64*, %struct.ptype_hash_compare*, i64, i32)* undef, !"kernel", i32 1}
!47 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::tuple"*, i64, i64, i64*, %struct.ptype_hash_compare*, i64, i32)* undef, !"maxntidx", i32 256}
!48 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::device_ptr"*, i64, %"class.thrust::tuple"*, i32*, %struct.ptype_hash_compare*, i64*, i64)* undef, !"kernel", i32 1}
!49 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::device_ptr"*, i64, %"class.thrust::tuple"*, i32*, %struct.ptype_hash_compare*, i64*, i64)* undef, !"maxntidx", i32 256}
!50 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.79"*, i64)* undef, !"kernel", i32 1}
!51 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.79"*, i64)* undef, !"maxntidx", i32 256}
!52 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.81"*, i64)* undef, !"kernel", i32 1}
!53 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.81"*, i64)* undef, !"maxntidx", i32 256}
!54 = !{void (%struct.buildneibs_params*)* undef, !"kernel", i32 1}
!55 = !{void (%struct.buildneibs_params*)* undef, !"maxntidx", i32 256}
!56 = !{void (%struct.buildneibs_params*)* undef, !"minctasm", i32 5}
!57 = !{void (%struct.euler_params*)* undef, !"kernel", i32 1}
!58 = !{void (%struct.euler_params.111*)* undef, !"kernel", i32 1}
!59 = !{void (%struct.euler_params.114*)* undef, !"kernel", i32 1}
!60 = !{void (%struct.euler_params.117*)* undef, !"kernel", i32 1}
!61 = !{void (%"struct.cub::ReduceByKeyScanTileState"*, i64)* undef, !"kernel", i32 1}
!62 = !{void (%"struct.cub::ReduceByKeyScanTileState"*, i64)* undef, !"maxntidx", i32 128}
!63 = !{void (%"class.thrust::device_ptr.120"*, %"class.thrust::device_ptr.124"*, %"class.thrust::device_ptr.124"*, %"struct.thrust::equal_to"*, %"struct.thrust::plus"*, %"struct.cub::ReduceByKeyScanTileState"*, i32, %"struct.thrust::cuda_cub::__scan_by_key::DoNothing"*)* undef, !"kernel", i32 1}
!64 = !{void (%"class.thrust::device_ptr.120"*, %"class.thrust::device_ptr.124"*, %"class.thrust::device_ptr.124"*, %"struct.thrust::equal_to"*, %"struct.thrust::plus"*, %"struct.cub::ReduceByKeyScanTileState"*, i32, %"struct.thrust::cuda_cub::__scan_by_key::DoNothing"*)* undef, !"maxntidx", i32 256}
!65 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.166"*, i64)* undef, !"kernel", i32 1}
!66 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.166"*, i64)* undef, !"maxntidx", i32 256}
!67 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.169"*, i64)* undef, !"kernel", i32 1}
!68 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.169"*, i64)* undef, !"maxntidx", i32 256}
!69 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.171"*, i64)* undef, !"kernel", i32 1}
!70 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.171"*, i64)* undef, !"maxntidx", i32 256}
!71 = !{void (%struct.density_diffusion_params*)* undef, !"kernel", i32 1}
!72 = !{void (%struct.forces_params*)* undef, !"kernel", i32 1}
!73 = !{void (%struct.forces_params.223*)* undef, !"kernel", i32 1}
!74 = !{void (%struct.finalize_forces_params*)* undef, !"kernel", i32 1}
!75 = !{void (%struct.forces_params.236*)* undef, !"kernel", i32 1}
!76 = !{void (%struct.forces_params.250*)* undef, !"kernel", i32 1}
!77 = !{void (%struct.forces_params.256*)* undef, !"kernel", i32 1}
!78 = !{void (%struct.finalize_forces_params.262*)* undef, !"kernel", i32 1}
!79 = !{void (%struct.neibs_interaction_params*, %struct.float4*)* undef, !"kernel", i32 1}
!80 = !{void (%struct.neibs_interaction_params*, %struct.float4*)* undef, !"maxntidx", i32 128}
!81 = !{void (%struct.neibs_interaction_params*, %struct.float4*)* undef, !"minctasm", i32 6}
!82 = !{void (%struct.neibs_interaction_params*, %struct.float3*)* undef, !"kernel", i32 1}
!83 = !{void (%"struct.cupostprocess::testpoints_params"*)* undef, !"kernel", i32 1}
!84 = !{void (%struct.neibs_interaction_params*, %struct.float4*, %struct.ushort4*)* undef, !"kernel", i32 1}
!85 = !{void (%struct.neibs_interaction_params*, %struct.float4*, %struct.ushort4*, float)* undef, !"kernel", i32 1}
!86 = !{void (%struct.reorder_params.278*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"kernel", i32 1}
!87 = !{void (%struct.reorder_params.278*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"maxntidx", i32 256}
!88 = !{void (%struct.reorder_params.278*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"minctasm", i32 6}
!89 = !{void (%struct.buildneibs_params.279*)* undef, !"kernel", i32 1}
!90 = !{void (%struct.buildneibs_params.279*)* undef, !"maxntidx", i32 256}
!91 = !{void (%struct.buildneibs_params.279*)* undef, !"minctasm", i32 5}
!92 = !{void (%struct.euler_params.282*)* undef, !"kernel", i32 1}
!93 = !{void (%struct.euler_params.285*)* undef, !"kernel", i32 1}
!94 = !{void (%struct.euler_params.288*)* undef, !"kernel", i32 1}
!95 = !{void (%struct.euler_params.291*)* undef, !"kernel", i32 1}
!96 = !{void (%struct.forces_params.294*)* undef, !"kernel", i32 1}
!97 = !{void (%struct.forces_params.300*)* undef, !"kernel", i32 1}
!98 = !{void (%struct.finalize_forces_params.305*)* undef, !"kernel", i32 1}
!99 = !{void (%struct.forces_params.312*)* undef, !"kernel", i32 1}
!100 = !{void (%struct.forces_params.319*)* undef, !"kernel", i32 1}
!101 = !{void (%struct.forces_params.325*)* undef, !"kernel", i32 1}
!102 = !{void (%struct.finalize_forces_params.331*)* undef, !"kernel", i32 1}
!103 = !{void (%"struct.cupostprocess::testpoints_params.336"*)* undef, !"kernel", i32 1}
!104 = !{void (%struct.density_diffusion_params.339*)* undef, !"kernel", i32 1}
!105 = !{void (%struct.forces_params.341*)* undef, !"kernel", i32 1}
!106 = !{void (%struct.forces_params.346*)* undef, !"kernel", i32 1}
!107 = !{void (%struct.forces_params.351*)* undef, !"kernel", i32 1}
!108 = !{void (%struct.forces_params.356*)* undef, !"kernel", i32 1}
!109 = !{void (%struct.forces_params.361*)* undef, !"kernel", i32 1}
!110 = !{void (%struct.forces_params.366*)* undef, !"kernel", i32 1}
!111 = !{void (%struct.density_diffusion_params.371*)* undef, !"kernel", i32 1}
!112 = !{void (%struct.forces_params.373*)* undef, !"kernel", i32 1}
!113 = !{void (%struct.forces_params.378*)* undef, !"kernel", i32 1}
!114 = !{void (%struct.forces_params.383*)* undef, !"kernel", i32 1}
!115 = !{void (%struct.forces_params.388*)* undef, !"kernel", i32 1}
!116 = !{void (%struct.forces_params.393*)* undef, !"kernel", i32 1}
!117 = !{void (%struct.forces_params.398*)* undef, !"kernel", i32 1}
!118 = !{void (%struct.density_diffusion_params.403*)* undef, !"kernel", i32 1}
!119 = !{void (%struct.forces_params.405*)* undef, !"kernel", i32 1}
!120 = !{void (%struct.forces_params.410*)* undef, !"kernel", i32 1}
!121 = !{void (%struct.forces_params.415*)* @_ZN8cuforces12forcesDeviceI13forces_paramsIL10KernelType3EL14SPHFormulation1EL20DensityDiffusionType3EL12BoundaryType4E12FullViscSpecIL12RheologyType0EL15TurbulenceModel1EL26ComputationalViscosityType0EL12ViscousModel0EL15AverageOperator0ELm517ELb0EELm517EL12ParticleType1ELSD_0EL7RunMode1ELb0ELb0ELb0ELb0E5emptyI18xsph_forces_paramsESF_I20volume_forces_paramsESF_I21grenier_forces_paramsESF_I25sa_boundary_forces_paramsESF_I28dummy_boundary_forces_paramsESF_I25water_depth_forces_paramsESF_I18keps_forces_paramsESF_I14tau_tex_paramsESF_I22eulerVel_forces_paramsESF_I29internal_energy_forces_paramsESF_I28effective_visc_forces_paramsEELS2_3ELS3_1ELS4_3ELS5_4ESC_Lm517ELSD_1ELSD_0EEEvT_, !"kernel", i32 1}
!122 = !{void (%struct.forces_params.420*)* undef, !"kernel", i32 1}
!123 = !{void (%struct.forces_params.425*)* undef, !"kernel", i32 1}
!124 = !{void (%struct.forces_params.430*)* undef, !"kernel", i32 1}
!125 = !{!126, !126, i64 0}
!126 = !{!"vtable pointer", !127, i64 0}
!127 = !{!"Simple C++ TBAA"}
!128 = !{!129, !129, i64 0}
!129 = !{!"long", !130, i64 0}
!130 = !{!"omnipotent char", !127, i64 0}
!131 = !{!132, !129, i64 64}
!132 = !{!"_ZTSN7cuneibs22neiblist_iterator_coreE", !133, i64 0, !133, i64 8, !134, i64 16, !136, i64 32, !137, i64 44, !138, i64 48, !129, i64 64, !137, i64 72, !130, i64 76, !137, i64 80}
!133 = !{!"any pointer", !130, i64 0}
!134 = !{!"_ZTS6float4", !135, i64 0, !135, i64 4, !135, i64 8, !135, i64 12}
!135 = !{!"float", !130, i64 0}
!136 = !{!"_ZTS4int3", !137, i64 0, !137, i64 4, !137, i64 8}
!137 = !{!"int", !130, i64 0}
!138 = !{!"_ZTS6float3", !135, i64 0, !135, i64 4, !135, i64 8}
@Artem-B
Copy link
Author

Artem-B commented Oct 11, 2021

SROA debug log:

SROA function: _ZNK7cuneibs22neiblist_iterator_core10neib_indexEv
SROA function: _ZNK11pos_wrapper8fetchPosEj
SROA function: _ZN7cuneibs22neiblist_iterator_coreC2EjRK6float4RK4int3PKjPKt
SROA function: _ZN7cuneibs22neiblist_iterator_core17update_neib_indexEt
SROA function: _ZN7cuneibs12getNeibIndexERK6float4R6float3PKjtRK4int3RhRj
SROA function: _ZN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEC1EjRK6float4RK4int3PKjPKt
SROA function: _ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv
SROA function: _ZNK7cuneibs22neiblist_iterator_core6relPosERK6float4
SROA function: _ZN7cuneibs14neib_list_stepIL12ParticleType0EEEmv
SROA function: _ZN8cuforces12forcesDeviceI13forces_paramsIL10KernelType3EL14SPHFormulation1EL20DensityDiffusionType3EL12BoundaryType4E12FullViscSpecIL12RheologyType0EL15TurbulenceModel1EL26ComputationalViscosityType0EL12ViscousModel0EL15AverageOperator0ELm517ELb0EELm517EL12ParticleType1ELSD_0EL7RunMode1ELb0ELb0ELb0ELb0E5emptyI18xsph_forces_paramsESF_I20volume_forces_paramsESF_I21grenier_forces_paramsESF_I25sa_boundary_forces_paramsESF_I28dummy_boundary_forces_paramsESF_I25water_depth_forces_paramsESF_I18keps_forces_paramsESF_I14tau_tex_paramsESF_I22eulerVel_forces_paramsESF_I29internal_energy_forces_paramsESF_I28effective_visc_forces_paramsEELS2_3ELS3_1ELS4_3ELS5_4ESC_Lm517ELSD_1ELSD_0EEEvT_
SROA alloca:   %2 = alloca %"class.cuneibs::neiblist_iterator.1", align 16
  Rewriting FCA loads and stores...
Slices of alloca:   %2 = alloca %"class.cuneibs::neiblist_iterator.1", align 16
  [0,8) slice #0
    used by:   store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEE, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)*** %11, align 16, !tbaa !125
  [16,24) slice #1
    used by:   %19 = load i32*, i32** %18, align 16
  [24,32) slice #2 (splittable)
    used by:   store i64 %8, i64* %10, align 8
  [32,36) slice #3
    used by:   %.idx.val.i.i.i = load float, float* %28, align 16
  [40,44) slice #4
    used by:   %.idx5.val.i.i.i = load float, float* %29, align 8
  [48,52) slice #5 (splittable)
    used by:   %.idx8.val.i.i.i = load i32, i32* %37, align 16
  [52,56) slice #6 (splittable)
    used by:   %.idx9.val.i.i.i = load i32, i32* %.idx9.i.i.i, align 4
  [56,60) slice #7 (splittable)
    used by:   %.idx10.val.i.i.i = load i32, i32* %.idx10.i.i.i, align 8
  [60,64) slice #8 (splittable)
    used by:   %61 = load i32, i32* %60, align 4
  [64,68) slice #9
    used by:   store float %oldret.i.i.i.i, float* %33, align 16
  [68,72) slice #10
    used by:   store float %oldret1.i.i.i.i, float* %.sroa.215.0..sroa_idx16.i.i.i, align 4
  [72,76) slice #11
    used by:   store float %oldret3.i.i.i.i, float* %.sroa.3.0..sroa_idx17.i.i.i, align 8
  [72,76) slice #12
    used by:   %.idx.val.i = load float, float* %.idx.i, align 8
  [80,88) slice #13 (splittable)
    used by:   %58 = load i64, i64* %57, align 16
  [80,88) slice #14 (splittable)
    used by:   store i64 %59, i64* %57, align 16, !tbaa !131
  [88,92) slice #15 (splittable)
    used by:   store i32 %46, i32* %21, align 8
  [92,93) slice #16 (splittable)
    used by:   store i8 %24, i8* %20, align 4
  [96,100) slice #17 (splittable)
    used by:   %48 = load i32, i32* %47, align 16
Pre-splitting loads and stores
  Searching for candidate loads and stores
Rewriting alloca partition [0,8) to:   %.sroa.0 = alloca i32 (...)**, align 16
  rewriting [0,8) slice #0
    original:   store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEE, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)*** %11, align 16, !tbaa !125
          to:   store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEE, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)*** %.sroa.0, align 16, !tbaa !125
Rewriting alloca partition [16,24) to:   %.sroa.1 = alloca i32*, align 16
  rewriting [16,24) slice #1
    original:   %19 = load i32*, i32** %18, align 16
          to:   %.sroa.1.16. = load i32*, i32** %.sroa.1, align 16
Rewriting alloca partition [24,32) to:   %.sroa.2 = alloca i64, align 8
  rewriting [24,32) slice #2 (splittable)
    original:   store i64 %8, i64* %10, align 8
          to:   store i64 %8, i64* %.sroa.2, align 8
Rewriting alloca partition [32,36) to:   %.sroa.3 = alloca float, align 16
  rewriting [32,36) slice #3
    original:   %.idx.val.i.i.i = load float, float* %28, align 16
          to:   %.sroa.3.32..idx.val.i.i.i = load float, float* %.sroa.3, align 16
Rewriting alloca partition [40,44) to:   %.sroa.4 = alloca float, align 8
  rewriting [40,44) slice #4
    original:   %.idx5.val.i.i.i = load float, float* %29, align 8
          to:   %.sroa.4.40..idx5.val.i.i.i = load float, float* %.sroa.4, align 8
Rewriting alloca partition [48,52) to:   %.sroa.5 = alloca i32, align 16
  rewriting [48,52) slice #5 (splittable)
    original:   %.idx8.val.i.i.i = load i32, i32* %37, align 16
          to:   %.sroa.5.48.load = load i32, i32* %.sroa.5, align 16
Rewriting alloca partition [52,56) to:   %.sroa.6 = alloca i32, align 4
  rewriting [52,56) slice #6 (splittable)
    original:   %.idx9.val.i.i.i = load i32, i32* %.idx9.i.i.i, align 4
          to:   %.sroa.6.52.load = load i32, i32* %.sroa.6, align 4
Rewriting alloca partition [56,60) to:   %.sroa.7 = alloca i32, align 8
  rewriting [56,60) slice #7 (splittable)
    original:   %.idx10.val.i.i.i = load i32, i32* %.idx10.i.i.i, align 8
          to:   %.sroa.7.56.load = load i32, i32* %.sroa.7, align 8
Rewriting alloca partition [60,64) to:   %.sroa.8 = alloca i32, align 4
  rewriting [60,64) slice #8 (splittable)
    original:   %61 = load i32, i32* %60, align 4
          to:   %.sroa.8.60.load = load i32, i32* %.sroa.8, align 4
Rewriting alloca partition [64,68) to:   %.sroa.9 = alloca float, align 16
  rewriting [64,68) slice #9
    original:   store float %oldret.i.i.i.i, float* %33, align 16
          to:   store float %oldret.i.i.i.i, float* %.sroa.9, align 16
Rewriting alloca partition [68,72) to:   %.sroa.10 = alloca float, align 4
  rewriting [68,72) slice #10
    original:   store float %oldret1.i.i.i.i, float* %.sroa.215.0..sroa_idx16.i.i.i, align 4
          to:   store float %oldret1.i.i.i.i, float* %.sroa.10, align 4
Rewriting alloca partition [72,76) to:   %.sroa.11 = alloca float, align 8
  rewriting [72,76) slice #11
    original:   store float %oldret3.i.i.i.i, float* %.sroa.3.0..sroa_idx17.i.i.i, align 8
          to:   store float %oldret3.i.i.i.i, float* %.sroa.11, align 8
  rewriting [72,76) slice #12
    original:   %.idx.val.i = load float, float* %.idx.i, align 8
          to:   %.sroa.11.72..idx.val.i = load float, float* %.sroa.11, align 8
Rewriting alloca partition [80,88) to:   %.sroa.13 = alloca i64, align 16
  rewriting [80,88) slice #13 (splittable)
    original:   %58 = load i64, i64* %57, align 16
          to:   %.sroa.13.80.load = load i64, i64* %.sroa.13, align 16
  rewriting [80,88) slice #14 (splittable)
    original:   store i64 %59, i64* %57, align 16, !tbaa !131
          to:   store i64 %59, i64* %.sroa.13, align 16, !tbaa !131
Rewriting alloca partition [88,92) to:   %.sroa.15 = alloca i32, align 8
  rewriting [88,92) slice #15 (splittable)
    original:   store i32 %46, i32* %21, align 8
          to:   store i32 %46, i32* %.sroa.15, align 8
Rewriting alloca partition [92,93) to:   %.sroa.16 = alloca i8, align 4
  rewriting [92,93) slice #16 (splittable)
    original:   store i8 %24, i8* %20, align 4
          to:   store i8 %24, i8* %.sroa.16, align 4
Rewriting alloca partition [96,100) to:   %.sroa.17 = alloca i32, align 16
  rewriting [96,100) slice #17 (splittable)
    original:   %48 = load i32, i32* %47, align 16
          to:   %.sroa.17.96.load = load i32, i32* %.sroa.17, align 16
  Speculating PHIs
  Speculating Selects
Deleting dead instruction:   %48 = load i32, i32* %47, align 16
Deleting dead instruction:   %47 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 11
Deleting dead instruction:   store i8 %24, i8* %20, align 4
Deleting dead instruction:   %20 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 9
Deleting dead instruction:   store i32 %45, i32* %20, align 8
Deleting dead instruction:   %20 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 8
Deleting dead instruction:   store i64 %55, i64* %53, align 16, !tbaa !131
Deleting dead instruction:   %54 = load i64, i64* %53, align 16
Deleting dead instruction:   %53 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 7
Deleting dead instruction:   %.idx.val.i = load float, float* %.idx.i, align 8
Deleting dead instruction:   %.idx.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 5, i32 2
Deleting dead instruction:   store float %oldret3.i.i.i.i, float* %.sroa.3.0..sroa_idx17.i.i.i, align 8
Deleting dead instruction:   %.sroa.3.0..sroa_idx17.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 5, i32 2
Deleting dead instruction:   store float %oldret1.i.i.i.i, float* %.sroa.215.0..sroa_idx16.i.i.i, align 4
Deleting dead instruction:   %.sroa.215.0..sroa_idx16.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 5, i32 1
Deleting dead instruction:   store float %oldret.i.i.i.i, float* %31, align 16
Deleting dead instruction:   %31 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 5, i32 0
Deleting dead instruction:   %54 = load i32, i32* %53, align 4
Deleting dead instruction:   %53 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 4
Deleting dead instruction:   %.idx10.val.i.i.i = load i32, i32* %.idx10.i.i.i, align 8
Deleting dead instruction:   %.idx10.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 3, i32 2
Deleting dead instruction:   %.idx9.val.i.i.i = load i32, i32* %.idx9.i.i.i, align 4
Deleting dead instruction:   %.idx9.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 3, i32 1
Deleting dead instruction:   %.idx8.val.i.i.i = load i32, i32* %34, align 16
Deleting dead instruction:   %34 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 3, i32 0
Deleting dead instruction:   %.idx5.val.i.i.i = load float, float* %27, align 8
Deleting dead instruction:   %27 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 2, i32 2
Deleting dead instruction:   %.idx.val.i.i.i = load float, float* %26, align 16
Deleting dead instruction:   %26 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 2, i32 0
Deleting dead instruction:   store i64 %8, i64* %10, align 8
Deleting dead instruction:   %10 = bitcast i16** %9 to i64*
Deleting dead instruction:   %9 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 1
Deleting dead instruction:   %17 = load i32*, i32** %16, align 16
Deleting dead instruction:   %16 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 0
Deleting dead instruction:   store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEE, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)*** %9, align 16, !tbaa !125
Deleting dead instruction:   %9 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 0, i32 0
Deleting dead instruction:   %2 = alloca %"class.cuneibs::neiblist_iterator.1", align 16
Promoting allocas with mem2reg...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment