Skip to content

Instantly share code, notes, and snippets.

@Artem-B
Created October 11, 2021 20:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Artem-B/a56797a9b918d4831a303ae2869dc83f to your computer and use it in GitHub Desktop.
Save Artem-B/a56797a9b918d4831a303ae2869dc83f to your computer and use it in GitHub Desktop.
;*** IR Dump After Straight line strength reduction (slsr) *** (function: _ZN8cuforces12forcesDeviceI13forces_paramsIL10KernelType3EL14SPHFormulation1EL20DensityDiffusionType3EL12BoundaryType4E12FullViscSpecIL12RheologyType0EL15TurbulenceModel1EL26ComputationalViscosityType0EL12ViscousModel0EL15AverageOperator0ELm517ELb0EELm517EL12ParticleType1ELSD_0EL7RunMode1ELb0ELb0ELb0ELb0E5emptyI18xsph_forces_paramsESF_I20volume_forces_paramsESF_I21grenier_forces_paramsESF_I25sa_boundary_forces_paramsESF_I28dummy_boundary_forces_paramsESF_I25water_depth_forces_paramsESF_I18keps_forces_paramsESF_I14tau_tex_paramsESF_I22eulerVel_forces_paramsESF_I29internal_energy_forces_paramsESF_I28effective_visc_forces_paramsEELS2_3ELS3_1ELS4_3ELS5_4ESC_Lm517ELSD_1ELSD_0EEEvT_)
; ModuleID = 'reduced.ll.ll'
source_filename = "<stdin>"
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
%struct.char3 = type { i8, i8, i8 }
%struct.float4 = type { float, float, float, float }
%struct.float3 = type { float, float, float }
%struct.int3 = type { i32, i32, i32 }
%struct.forces_params.415 = type { %struct.common_forces_params }
%struct.common_forces_params = type { %struct.stage_common_forces_params.base, i16*, float, float, i32, float }
%struct.stage_common_forces_params.base = type <{ %struct.pos_info_wrapper, %struct.vel_wrapper, %struct.float4*, i32*, i32*, i32, i32, float }>
%struct.pos_info_wrapper = type { %struct.pos_wrapper, %struct.info_wrapper }
%struct.pos_wrapper = type { %struct.float4* }
%struct.info_wrapper = type { %struct.ushort4* }
%struct.ushort4 = type { i16, i16, i16, i16 }
%struct.vel_wrapper = type { %struct.float4* }
%"class.cuneibs::neiblist_iterator.1" = type { %"class.cuneibs::neiblist_iterator_simple.base.3", [8 x i8], %"class.cuneibs::neiblist_iterator_core.base", [12 x i8] }
%"class.cuneibs::neiblist_iterator_simple.base.3" = type { i32 (...)** }
%"class.cuneibs::neiblist_iterator_core.base" = type <{ i32*, i16*, %struct.float4, %struct.int3, i32, %struct.float3, [4 x i8], i64, i32, i8, [3 x i8], i32 }>
%struct.cell_params = type { %struct.cellStart_wrapper, %struct.cellEnd_wrapper }
%struct.cellStart_wrapper = type { i32* }
%struct.cellEnd_wrapper = type { i32* }
%struct.uint4 = type { i32, i32, i32, i32 }
%struct.jacobi_update_params = type <{ %struct.info_wrapper, %struct.float4*, float*, float*, i32, [4 x i8] }>
%struct.sa_outgoing_bc_params = type { %struct.neibs_list_params.base, %struct.vel_wrapper, %struct.boundelements_wrapper, %struct.vertPos_params, %struct.uint4*, %struct.float4* }
%struct.neibs_list_params.base = type <{ %struct.pos_info_wrapper, i32*, i32*, i16*, i32, float, float }>
%struct.boundelements_wrapper = type { %struct.float4* }
%struct.vertPos_params = type { %struct.float2.0*, %struct.float2.0*, %struct.float2.0* }
%struct.float2.0 = type { float, float }
%struct.reorder_params = type { %struct.reorder_data, %struct.reorder_data.9, %struct.reorder_data.30 }
%struct.reorder_data = type { %struct.float4*, %struct.float4* }
%struct.reorder_data.9 = type { %struct.float4*, %struct.float4* }
%struct.reorder_data.30 = type { %struct.float4*, %struct.float4* }
%"class.thrust::zip_iterator" = type { %"class.thrust::tuple.32" }
%"class.thrust::tuple.32" = type { %"struct.thrust::detail::cons.33" }
%"struct.thrust::detail::cons.33" = type { %"class.thrust::device_ptr", %"struct.thrust::detail::cons.35" }
%"class.thrust::device_ptr" = type { %"class.thrust::pointer" }
%"class.thrust::pointer" = type { %"class.thrust::iterator_adaptor" }
%"class.thrust::iterator_adaptor" = type { i32* }
%"struct.thrust::detail::cons.35" = type { %"class.thrust::device_ptr.36" }
%"class.thrust::device_ptr.36" = type { %"class.thrust::pointer.37" }
%"class.thrust::pointer.37" = type { %"class.thrust::iterator_adaptor.38" }
%"class.thrust::iterator_adaptor.38" = type { %struct.ushort4* }
%"class.thrust::tuple" = type { %"struct.thrust::detail::cons" }
%"struct.thrust::detail::cons" = type { i32, [4 x i8], %"struct.thrust::detail::cons.31" }
%"struct.thrust::detail::cons.31" = type { %struct.ushort4 }
%struct.ptype_hash_compare = type { i8 }
%"struct.thrust::cuda_cub::__transform::unary_transform_f" = type <{ %"class.thrust::device_ptr", i32*, %"struct.thrust::identity", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%"struct.thrust::identity" = type { i8 }
%"struct.thrust::cuda_cub::__transform::always_true_predicate" = type { i8 }
%"struct.thrust::cuda_cub::__transform::unary_transform_f.69" = type <{ %"class.thrust::device_ptr.36", %struct.ushort4*, %"struct.thrust::identity.70", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%"struct.thrust::identity.70" = type { i8 }
%"struct.thrust::cuda_cub::__transform::unary_transform_f.72" = type <{ i32*, %"class.thrust::device_ptr", %"struct.thrust::identity", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%"struct.thrust::cuda_cub::__transform::unary_transform_f.74" = type <{ %struct.ushort4*, %"class.thrust::device_ptr.36", %"struct.thrust::identity.70", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%"struct.thrust::cuda_cub::__transform::unary_transform_f.79" = type <{ %"class.thrust::device_ptr.36", %"class.thrust::device_ptr.36", %"struct.thrust::identity.70", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%"struct.thrust::cuda_cub::__transform::unary_transform_f.81" = type <{ %"class.thrust::device_ptr", %"class.thrust::device_ptr", %"struct.thrust::identity", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%struct.buildneibs_params = type { %struct.common_buildneibs_params, %struct.planes_buildneibs_params }
%struct.common_buildneibs_params = type { %struct.pos_info_wrapper, %struct.cell_params, i32*, i16*, i32, float }
%struct.planes_buildneibs_params = type { %struct.int4* }
%struct.int4 = type { i32, i32, i32, i32 }
%struct.euler_params = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%struct.common_euler_params.base = type <{ %struct.Pos_params, %struct.Vel_params, i32*, %struct.ushort4*, %struct.float4*, i32, float, float }>
%struct.Pos_params = type { %struct.float4*, %struct.float4* }
%struct.Vel_params = type { %struct.float4*, %struct.float4* }
%struct.dummy_euler_params = type { %struct.float4* }
%struct.euler_params.111 = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%struct.euler_params.114 = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%struct.euler_params.117 = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%"struct.cub::ReduceByKeyScanTileState" = type { %"struct.cub::ScanTileState" }
%"struct.cub::ScanTileState" = type { i8*, %"struct.cub::KeyValuePair"*, %"struct.cub::KeyValuePair"* }
%"struct.cub::KeyValuePair" = type { i32, [12 x i8], %struct.float4 }
%"class.thrust::device_ptr.120" = type { %"class.thrust::pointer.121" }
%"class.thrust::pointer.121" = type { %"class.thrust::iterator_adaptor.122" }
%"class.thrust::iterator_adaptor.122" = type { i32* }
%"class.thrust::device_ptr.124" = type { %"class.thrust::pointer.125" }
%"class.thrust::pointer.125" = type { %"class.thrust::iterator_adaptor.126" }
%"class.thrust::iterator_adaptor.126" = type { %struct.float4* }
%"struct.thrust::equal_to" = type { i8 }
%"struct.thrust::plus" = type { i8 }
%"struct.thrust::cuda_cub::__scan_by_key::DoNothing" = type { i8 }
%"struct.thrust::cuda_cub::__transform::unary_transform_f.166" = type <{ %struct.float4*, %"class.thrust::device_ptr.124", %"struct.thrust::identity.167", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%"struct.thrust::identity.167" = type { i8 }
%"struct.thrust::cuda_cub::__transform::unary_transform_f.169" = type <{ %"class.thrust::device_ptr.120", i32*, %"struct.thrust::identity", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%"struct.thrust::cuda_cub::__transform::unary_transform_f.171" = type <{ %"class.thrust::device_ptr.124", %struct.float4*, %"struct.thrust::identity.167", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }>
%struct.density_diffusion_params = type { %struct.common_density_diffusion_params.base, [4 x i8] }
%struct.common_density_diffusion_params.base = type <{ %struct.float4*, %struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i32*, i16*, i32, float, float, float, float }>
%struct.forces_params = type { %struct.common_forces_params }
%struct.forces_params.223 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.dummy_boundary_forces_params = type { %struct.float4* }
%struct.finalize_forces_params = type { %struct.common_finalize_forces_params, %struct.planes_forces_params, %struct.dyndt_finalize_forces_params }
%struct.common_finalize_forces_params = type { %struct.stage_common_forces_params.base, %struct.float4*, float, %struct.float4*, %struct.float4* }
%struct.planes_forces_params = type { %struct.int4* }
%struct.dyndt_finalize_forces_params = type { float*, float*, float*, i32, i32 }
%struct.forces_params.236 = type { %struct.common_forces_params }
%struct.forces_params.250 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.256 = type { %struct.common_forces_params }
%struct.finalize_forces_params.262 = type { %struct.common_finalize_forces_params.263, %struct.planes_forces_params, %struct.dyndt_finalize_forces_params }
%struct.common_finalize_forces_params.263 = type { %struct.stage_common_forces_params.base, %struct.float4*, %struct.float4*, %struct.float4* }
%struct.neibs_interaction_params = type { %struct.neibs_list_params.base, %struct.vel_wrapper }
%"struct.cupostprocess::testpoints_params" = type { %struct.neibs_interaction_params, %struct.float4* }
%struct.reorder_params.278 = type { %struct.reorder_data, %struct.reorder_data.9, %struct.reorder_data.30 }
%struct.buildneibs_params.279 = type { %struct.common_buildneibs_params }
%struct.euler_params.282 = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%struct.euler_params.285 = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%struct.euler_params.288 = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%struct.euler_params.291 = type { %struct.common_euler_params.base, %struct.dummy_euler_params }
%struct.forces_params.294 = type { %struct.common_forces_params }
%struct.forces_params.300 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.finalize_forces_params.305 = type { %struct.common_finalize_forces_params, %struct.dyndt_finalize_forces_params }
%struct.forces_params.312 = type { %struct.common_forces_params }
%struct.forces_params.319 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.325 = type { %struct.common_forces_params }
%struct.finalize_forces_params.331 = type { %struct.common_finalize_forces_params.263, %struct.dyndt_finalize_forces_params }
%"struct.cupostprocess::testpoints_params.336" = type { %struct.neibs_interaction_params, %struct.float4* }
%struct.density_diffusion_params.339 = type { %struct.common_density_diffusion_params.base, [4 x i8] }
%struct.forces_params.341 = type { %struct.common_forces_params }
%struct.forces_params.346 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.351 = type { %struct.common_forces_params }
%struct.forces_params.356 = type { %struct.common_forces_params }
%struct.forces_params.361 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.366 = type { %struct.common_forces_params }
%struct.density_diffusion_params.371 = type { %struct.common_density_diffusion_params.base, [4 x i8] }
%struct.forces_params.373 = type { %struct.common_forces_params }
%struct.forces_params.378 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.383 = type { %struct.common_forces_params }
%struct.forces_params.388 = type { %struct.common_forces_params }
%struct.forces_params.393 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.398 = type { %struct.common_forces_params }
%struct.density_diffusion_params.403 = type { %struct.common_density_diffusion_params.base, [4 x i8] }
%struct.forces_params.405 = type { %struct.common_forces_params }
%struct.forces_params.410 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.420 = type { %struct.common_forces_params }
%struct.forces_params.425 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params }
%struct.forces_params.430 = type { %struct.common_forces_params }
@_ZN7cuneibs16d_cell_to_offsetE = external local_unnamed_addr addrspace(4) externally_initialized global [27 x %struct.char3], align 1
@_ZN7cuneibs17d_neiblist_strideE = external local_unnamed_addr addrspace(4) externally_initialized global i64, align 8
@_ZTVN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEE = linkonce_odr unnamed_addr addrspace(1) constant { [3 x i8*] } { [3 x i8*] [i8* inttoptr (i64 16 to i8*), i8* null, i8* null] }, align 8
; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
declare i1 @_ZL8isfinitef(float) local_unnamed_addr
declare %struct.float4 @_ZL11make_float4ffff(float, float) local_unnamed_addr
declare %struct.float3 @_ZL11make_float3fff(float, float) local_unnamed_addr
declare %struct.float3 @_ZmlRK5char3RK6float3() local_unnamed_addr
declare %struct.int3 @_ZL9make_int3iii(i32, i32, i32) local_unnamed_addr
define void @_ZN8cuforces12forcesDeviceI13forces_paramsIL10KernelType3EL14SPHFormulation1EL20DensityDiffusionType3EL12BoundaryType4E12FullViscSpecIL12RheologyType0EL15TurbulenceModel1EL26ComputationalViscosityType0EL12ViscousModel0EL15AverageOperator0ELm517ELb0EELm517EL12ParticleType1ELSD_0EL7RunMode1ELb0ELb0ELb0ELb0E5emptyI18xsph_forces_paramsESF_I20volume_forces_paramsESF_I21grenier_forces_paramsESF_I25sa_boundary_forces_paramsESF_I28dummy_boundary_forces_paramsESF_I25water_depth_forces_paramsESF_I18keps_forces_paramsESF_I14tau_tex_paramsESF_I22eulerVel_forces_paramsESF_I29internal_energy_forces_paramsESF_I28effective_visc_forces_paramsEELS2_3ELS3_1ELS4_3ELS5_4ESC_Lm517ELSD_1ELSD_0EEEvT_(%struct.forces_params.415* nocapture readonly %0) local_unnamed_addr {
%2 = addrspacecast %struct.forces_params.415* %0 to %struct.forces_params.415 addrspace(1)*
%3 = alloca %"class.cuneibs::neiblist_iterator.1", align 16
%4 = addrspacecast %"class.cuneibs::neiblist_iterator.1"* %3 to %"class.cuneibs::neiblist_iterator.1" addrspace(5)*
%5 = getelementptr inbounds %struct.forces_params.415, %struct.forces_params.415 addrspace(1)* %2, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
%6 = bitcast %struct.forces_params.415 addrspace(1)* %2 to i8* addrspace(1)*
%7 = load i8*, i8* addrspace(1)* %6, align 8
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1 %7, i64 undef, i1 false)
%8 = getelementptr inbounds %struct.forces_params.415, %struct.forces_params.415 addrspace(1)* %2, i64 0, i32 0, i32 1
%9 = load i16*, i16* addrspace(1)* %8, align 8
%10 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 1
store i16* %9, i16* addrspace(5)* %10, align 8
%11 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 0, i32 0
store i32 (...)** bitcast (i8** getelementptr ({ [3 x i8*] }, { [3 x i8*] }* addrspacecast ({ [3 x i8*] } addrspace(1)* @_ZTVN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEE to { [3 x i8*] }*), i64 0, i32 0, i64 3) to i32 (...)**), i32 (...)** addrspace(5)* %11, align 16, !tbaa !125
%12 = bitcast %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4 to i8 addrspace(5)*
%13 = getelementptr inbounds i16, i16* %9, i64 undef
%14 = load i16, i16* %13, align 2
%15 = icmp eq i16 %14, -1
br i1 %15, label %.critedge, label %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit.lr.ph
_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit.lr.ph: ; preds = %1
%16 = bitcast %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4 to i8* addrspace(5)*
%17 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 0
%18 = load i32*, i32* addrspace(5)* %17, align 16
%19 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 9
%20 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 8
%21 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 2, i32 0
%.idx.val.i.i.i = load float, float addrspace(5)* %21, align 16
%22 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 2, i32 2
%.idx5.val.i.i.i = load float, float addrspace(5)* %22, align 8
%23 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 5, i32 0
%.sroa.215.0..sroa_idx16.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 5, i32 1
%.sroa.3.0..sroa_idx17.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 5, i32 2
%24 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 3, i32 0
%.idx8.val.i.i.i = load i32, i32 addrspace(5)* %24, align 16
%.idx9.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 3, i32 1
%.idx9.val.i.i.i = load i32, i32 addrspace(5)* %.idx9.i.i.i, align 4
%.idx10.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 3, i32 2
%.idx10.val.i.i.i = load i32, i32 addrspace(5)* %.idx10.i.i.i, align 8
%25 = getelementptr inbounds i32, i32* %18, i64 undef
%26 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 7
%27 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 4
%28 = load i32, i32 addrspace(5)* %27, align 4
%29 = zext i32 %28 to i64
%.pre = load i8*, i8* addrspace(5)* %16, align 16
%30 = lshr i16 %14, 11
%31 = trunc i16 %30 to i8
%32 = add nsw i8 %31, -1
store i8 %32, i8 addrspace(5)* %19, align 4
%33 = tail call %struct.float3 @_ZmlRK5char3RK6float3()
%34 = extractvalue %struct.float3 %33, 0
%35 = extractvalue %struct.float3 %33, 2
%36 = fsub contract float %.idx.val.i.i.i, %34
%37 = fsub contract float %.idx5.val.i.i.i, %35
%38 = tail call %struct.float3 @_ZL11make_float3fff(float %36, float %37)
%oldret.i.i.i.i.peel = extractvalue %struct.float3 %38, 0
%oldret1.i.i.i.i.peel = extractvalue %struct.float3 %38, 1
%oldret3.i.i.i.i.peel = extractvalue %struct.float3 %38, 2
store float %oldret.i.i.i.i.peel, float addrspace(5)* %23, align 16
store float %oldret1.i.i.i.i.peel, float addrspace(5)* %.sroa.215.0..sroa_idx16.i.i.i, align 4
store float %oldret3.i.i.i.i.peel, float addrspace(5)* %.sroa.3.0..sroa_idx17.i.i.i, align 8
%39 = zext i8 %32 to i64
%40 = getelementptr inbounds [27 x %struct.char3], [27 x %struct.char3] addrspace(4)* @_ZN7cuneibs16d_cell_to_offsetE, i64 0, i64 %39
%.idx11.i.i.i.peel = getelementptr %struct.char3, %struct.char3 addrspace(4)* %40, i64 0, i32 0
%.idx11.val.i.i.i.peel = load i8, i8 addrspace(4)* %.idx11.i.i.i.peel, align 1
%.idx12.i.i.i.peel = getelementptr %struct.char3, %struct.char3 addrspace(4)* %40, i64 0, i32 1
%.idx12.val.i.i.i.peel = load i8, i8 addrspace(4)* %.idx12.i.i.i.peel, align 1
%.idx13.i.i.i.peel = getelementptr %struct.char3, %struct.char3 addrspace(4)* %40, i64 0, i32 2
%.idx13.val.i.i.i.peel = load i8, i8 addrspace(4)* %.idx13.i.i.i.peel, align 1
%41 = sext i8 %.idx11.val.i.i.i.peel to i32
%42 = add nsw i32 %.idx8.val.i.i.i, %41
%43 = sext i8 %.idx12.val.i.i.i.peel to i32
%44 = add nsw i32 %.idx9.val.i.i.i, %43
%45 = sext i8 %.idx13.val.i.i.i.peel to i32
%46 = add nsw i32 %.idx10.val.i.i.i, %45
%47 = tail call %struct.int3 @_ZL9make_int3iii(i32 %42, i32 %44, i32 %46)
%48 = load i32, i32* %25, align 4
store i32 %48, i32 addrspace(5)* %20, align 8
%49 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 11
%50 = load i32, i32 addrspace(5)* %49, align 16
%51 = load %struct.float4*, %struct.float4* addrspace(1)* %5, align 8
%52 = zext i32 %50 to i64
%53 = getelementptr inbounds %struct.float4, %struct.float4* %51, i64 %52
%54 = bitcast %struct.float4* %53 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1 %54, i64 undef, i1 false)
%.idx.i.peel = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1" addrspace(5)* %4, i64 0, i32 2, i32 5, i32 2
%.idx.val.i.peel = load float, float addrspace(5)* %.idx.i.peel, align 8
%.idx1.val.i.peel = load float, float* inttoptr (i64 8 to float*), align 8
%55 = fsub contract float %.idx.val.i.peel, %.idx1.val.i.peel
%56 = tail call %struct.float4 @_ZL11make_float4ffff(float %55, float undef)
%57 = tail call i1 @_ZL8isfinitef(float undef)
%58 = load i64, i64 addrspace(4)* @_ZN7cuneibs17d_neiblist_strideE, align 8, !tbaa !128
%59 = load i64, i64 addrspace(5)* %26, align 16
%60 = add i64 %59, %58
store i64 %60, i64 addrspace(5)* %26, align 16, !tbaa !131
%61 = add i64 %60, %29
%62 = getelementptr inbounds i16, i16* %9, i64 %61
%63 = load i16, i16* %62, align 2
%64 = icmp eq i16 %63, -1
br i1 %64, label %.critedge, label %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit.lr.ph.peel.newph
_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit.lr.ph.peel.newph: ; preds = %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit.lr.ph
%65 = getelementptr i8, i8* %.pre, i64 -24
%66 = bitcast i8* %65 to i64*
br label %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit
_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit: ; preds = %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit, %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit.lr.ph.peel.newph
%67 = phi i16 [ %63, %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit.lr.ph.peel.newph ], [ %105, %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit ]
%68 = lshr i16 %67, 11
%69 = trunc i16 %68 to i8
%70 = add nsw i8 %69, -1
store i8 %70, i8 addrspace(5)* %19, align 4
%71 = tail call %struct.float3 @_ZmlRK5char3RK6float3()
%72 = extractvalue %struct.float3 %71, 0
%73 = extractvalue %struct.float3 %71, 2
%74 = fsub contract float %.idx.val.i.i.i, %72
%75 = fsub contract float %.idx5.val.i.i.i, %73
%76 = tail call %struct.float3 @_ZL11make_float3fff(float %74, float %75)
%oldret.i.i.i.i = extractvalue %struct.float3 %76, 0
%oldret1.i.i.i.i = extractvalue %struct.float3 %76, 1
%oldret3.i.i.i.i = extractvalue %struct.float3 %76, 2
store float %oldret.i.i.i.i, float addrspace(5)* %23, align 16
store float %oldret1.i.i.i.i, float addrspace(5)* %.sroa.215.0..sroa_idx16.i.i.i, align 4
store float %oldret3.i.i.i.i, float addrspace(5)* %.sroa.3.0..sroa_idx17.i.i.i, align 8
%77 = zext i8 %70 to i64
%78 = getelementptr inbounds [27 x %struct.char3], [27 x %struct.char3] addrspace(4)* @_ZN7cuneibs16d_cell_to_offsetE, i64 0, i64 %77
%.idx11.i.i.i = getelementptr %struct.char3, %struct.char3 addrspace(4)* %78, i64 0, i32 0
%.idx11.val.i.i.i = load i8, i8 addrspace(4)* %.idx11.i.i.i, align 1
%.idx12.i.i.i = getelementptr %struct.char3, %struct.char3 addrspace(4)* %78, i64 0, i32 1
%.idx12.val.i.i.i = load i8, i8 addrspace(4)* %.idx12.i.i.i, align 1
%.idx13.i.i.i = getelementptr %struct.char3, %struct.char3 addrspace(4)* %78, i64 0, i32 2
%.idx13.val.i.i.i = load i8, i8 addrspace(4)* %.idx13.i.i.i, align 1
%79 = sext i8 %.idx11.val.i.i.i to i32
%80 = add nsw i32 %.idx8.val.i.i.i, %79
%81 = sext i8 %.idx12.val.i.i.i to i32
%82 = add nsw i32 %.idx9.val.i.i.i, %81
%83 = sext i8 %.idx13.val.i.i.i to i32
%84 = add nsw i32 %.idx10.val.i.i.i, %83
%85 = tail call %struct.int3 @_ZL9make_int3iii(i32 %80, i32 %82, i32 %84)
%86 = load i32, i32* %25, align 4
store i32 %86, i32 addrspace(5)* %20, align 8
%87 = load i64, i64* %66, align 8
%88 = getelementptr inbounds i8, i8 addrspace(5)* %12, i64 %87
%89 = getelementptr inbounds i8, i8 addrspace(5)* %88, i64 80
%90 = bitcast i8 addrspace(5)* %89 to i32 addrspace(5)*
%91 = load i32, i32 addrspace(5)* %90, align 4
%92 = load %struct.float4*, %struct.float4* addrspace(1)* %5, align 8
%93 = zext i32 %91 to i64
%94 = getelementptr inbounds %struct.float4, %struct.float4* %92, i64 %93
%95 = bitcast %struct.float4* %94 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1 %95, i64 undef, i1 false)
%.idx.i = getelementptr i8, i8 addrspace(5)* %88, i64 56
%96 = bitcast i8 addrspace(5)* %.idx.i to float addrspace(5)*
%.idx.val.i = load float, float addrspace(5)* %96, align 4
%.idx1.val.i = load float, float* inttoptr (i64 8 to float*), align 8
%97 = fsub contract float %.idx.val.i, %.idx1.val.i
%98 = tail call %struct.float4 @_ZL11make_float4ffff(float %97, float undef)
%99 = tail call i1 @_ZL8isfinitef(float undef)
%100 = load i64, i64 addrspace(4)* @_ZN7cuneibs17d_neiblist_strideE, align 8, !tbaa !128
%101 = load i64, i64 addrspace(5)* %26, align 16
%102 = add i64 %101, %100
store i64 %102, i64 addrspace(5)* %26, align 16, !tbaa !131
%103 = add i64 %102, %29
%104 = getelementptr inbounds i16, i16* %9, i64 %103
%105 = load i16, i16* %104, align 2
%106 = icmp eq i16 %105, -1
br i1 %106, label %.critedge, label %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit, !llvm.loop !139
.critedge: ; preds = %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit.lr.ph, %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit, %1
ret void
}
attributes #0 = { argmemonly mustprogress nofree nounwind willreturn }
!nvvm.annotations = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46, !47, !48, !49, !50, !51, !52, !53, !54, !55, !56, !57, !57, !58, !58, !59, !59, !60, !60, !61, !62, !63, !64, !65, !66, !67, !68, !69, !70, !71, !72, !73, !74, !75, !76, !77, !78, !79, !80, !81, !79, !80, !81, !82, !83, !84, !84, !85, !85, !86, !87, !88, !89, !90, !91, !92, !92, !93, !93, !94, !94, !95, !95, !71, !96, !97, !98, !99, !100, !101, !102, !103, !84, !84, !85, !85, !104, !105, !106, !107, !104, !108, !109, !110, !111, !112, !113, !114, !111, !115, !116, !117, !118, !119, !120, !121, !118, !122, !123, !124}
!0 = !{void (i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"kernel", i32 1}
!1 = !{void (i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"maxntidx", i32 256}
!2 = !{void (i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"minctasm", i32 6}
!3 = !{void (%struct.cell_params*, i32)* undef, !"kernel", i32 1}
!4 = !{void (%struct.float4*, %struct.ushort4*, i32*, %struct.uint4*, i32*, i16*, i32, float, float)* undef, !"kernel", i32 1}
!5 = !{void (%struct.ushort4*, %struct.float4*, %struct.uint4*, i32)* undef, !"kernel", i32 1}
!6 = !{void (%struct.ushort4*, %struct.float4*, %struct.float4*, i32, i32, float)* undef, !"kernel", i32 1}
!7 = !{void (%struct.float4*, %struct.ushort4*, i32)* undef, !"kernel", i32 1}
!8 = !{void (%struct.pos_info_wrapper*, float*, i32, float)* undef, !"kernel", i32 1}
!9 = !{void (%struct.pos_info_wrapper*, float*, i32, float)* undef, !"maxntidx", i32 128}
!10 = !{void (%struct.pos_info_wrapper*, float*, i32, float)* undef, !"minctasm", i32 6}
!11 = !{void (%struct.jacobi_update_params*)* undef, !"kernel", i32 1}
!12 = !{void (%struct.jacobi_update_params*)* undef, !"maxntidx", i32 128}
!13 = !{void (%struct.jacobi_update_params*)* undef, !"minctasm", i32 6}
!14 = !{void (%struct.ushort4*, %struct.float4*, %struct.float4*, float*, i32)* undef, !"kernel", i32 1}
!15 = !{void (%struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i32, i32, %struct.float4*)* undef, !"kernel", i32 1}
!16 = !{void (%struct.float4*, i32, i32)* undef, !"kernel", i32 1}
!17 = !{void ()* undef, !"kernel", i32 1}
!18 = !{void (float*, %struct.float4*, i32)* undef, !"kernel", i32 1}
!19 = !{void (%struct.float4*, %struct.float4*, %struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i16*, i32*, i32, float, float)* undef, !"kernel", i32 1}
!20 = !{void (%struct.float4*, %struct.float4*, %struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i16*, i32*, i32, float, float)* undef, !"maxntidx", i32 128}
!21 = !{void (%struct.float4*, %struct.float4*, %struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i16*, i32*, i32, float, float)* undef, !"minctasm", i32 6}
!22 = !{void (%struct.sa_outgoing_bc_params*)* undef, !"kernel", i32 1}
!23 = !{void (%struct.float4*, %struct.uint4*, %struct.ushort4*, i32*, i32*, i16*, i32)* undef, !"kernel", i32 1}
!24 = !{void (%struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32)* undef, !"kernel", i32 1}
!25 = !{void (%struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32)* undef, !"maxntidx", i32 128}
!26 = !{void (%struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32)* undef, !"minctasm", i32 6}
!27 = !{void (%struct.float4*, %struct.float4*, %struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32, float)* undef, !"kernel", i32 1}
!28 = !{void (%struct.float4*, %struct.float4*, %struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32, float)* undef, !"maxntidx", i32 128}
!29 = !{void (%struct.float4*, %struct.float4*, %struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32, float)* undef, !"minctasm", i32 6}
!30 = !{void (%struct.float4*, i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"kernel", i32 1}
!31 = !{void (%struct.float4*, i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"maxntidx", i32 256}
!32 = !{void (%struct.float4*, i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"minctasm", i32 6}
!33 = !{void (%struct.reorder_params*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"kernel", i32 1}
!34 = !{void (%struct.reorder_params*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"maxntidx", i32 256}
!35 = !{void (%struct.reorder_params*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"minctasm", i32 6}
!36 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::device_ptr"*, i64, %"class.thrust::tuple"*, i32*, %struct.ptype_hash_compare*)* undef, !"kernel", i32 1}
!37 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::device_ptr"*, i64, %"class.thrust::tuple"*, i32*, %struct.ptype_hash_compare*)* undef, !"maxntidx", i32 256}
!38 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f"*, i64)* undef, !"kernel", i32 1}
!39 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f"*, i64)* undef, !"maxntidx", i32 256}
!40 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.69"*, i64)* undef, !"kernel", i32 1}
!41 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.69"*, i64)* undef, !"maxntidx", i32 256}
!42 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.72"*, i64)* undef, !"kernel", i32 1}
!43 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.72"*, i64)* undef, !"maxntidx", i32 256}
!44 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.74"*, i64)* undef, !"kernel", i32 1}
!45 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.74"*, i64)* undef, !"maxntidx", i32 256}
!46 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::tuple"*, i64, i64, i64*, %struct.ptype_hash_compare*, i64, i32)* undef, !"kernel", i32 1}
!47 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::tuple"*, i64, i64, i64*, %struct.ptype_hash_compare*, i64, i32)* undef, !"maxntidx", i32 256}
!48 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::device_ptr"*, i64, %"class.thrust::tuple"*, i32*, %struct.ptype_hash_compare*, i64*, i64)* undef, !"kernel", i32 1}
!49 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::device_ptr"*, i64, %"class.thrust::tuple"*, i32*, %struct.ptype_hash_compare*, i64*, i64)* undef, !"maxntidx", i32 256}
!50 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.79"*, i64)* undef, !"kernel", i32 1}
!51 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.79"*, i64)* undef, !"maxntidx", i32 256}
!52 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.81"*, i64)* undef, !"kernel", i32 1}
!53 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.81"*, i64)* undef, !"maxntidx", i32 256}
!54 = !{void (%struct.buildneibs_params*)* undef, !"kernel", i32 1}
!55 = !{void (%struct.buildneibs_params*)* undef, !"maxntidx", i32 256}
!56 = !{void (%struct.buildneibs_params*)* undef, !"minctasm", i32 5}
!57 = !{void (%struct.euler_params*)* undef, !"kernel", i32 1}
!58 = !{void (%struct.euler_params.111*)* undef, !"kernel", i32 1}
!59 = !{void (%struct.euler_params.114*)* undef, !"kernel", i32 1}
!60 = !{void (%struct.euler_params.117*)* undef, !"kernel", i32 1}
!61 = !{void (%"struct.cub::ReduceByKeyScanTileState"*, i64)* undef, !"kernel", i32 1}
!62 = !{void (%"struct.cub::ReduceByKeyScanTileState"*, i64)* undef, !"maxntidx", i32 128}
!63 = !{void (%"class.thrust::device_ptr.120"*, %"class.thrust::device_ptr.124"*, %"class.thrust::device_ptr.124"*, %"struct.thrust::equal_to"*, %"struct.thrust::plus"*, %"struct.cub::ReduceByKeyScanTileState"*, i32, %"struct.thrust::cuda_cub::__scan_by_key::DoNothing"*)* undef, !"kernel", i32 1}
!64 = !{void (%"class.thrust::device_ptr.120"*, %"class.thrust::device_ptr.124"*, %"class.thrust::device_ptr.124"*, %"struct.thrust::equal_to"*, %"struct.thrust::plus"*, %"struct.cub::ReduceByKeyScanTileState"*, i32, %"struct.thrust::cuda_cub::__scan_by_key::DoNothing"*)* undef, !"maxntidx", i32 256}
!65 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.166"*, i64)* undef, !"kernel", i32 1}
!66 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.166"*, i64)* undef, !"maxntidx", i32 256}
!67 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.169"*, i64)* undef, !"kernel", i32 1}
!68 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.169"*, i64)* undef, !"maxntidx", i32 256}
!69 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.171"*, i64)* undef, !"kernel", i32 1}
!70 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.171"*, i64)* undef, !"maxntidx", i32 256}
!71 = !{void (%struct.density_diffusion_params*)* undef, !"kernel", i32 1}
!72 = !{void (%struct.forces_params*)* undef, !"kernel", i32 1}
!73 = !{void (%struct.forces_params.223*)* undef, !"kernel", i32 1}
!74 = !{void (%struct.finalize_forces_params*)* undef, !"kernel", i32 1}
!75 = !{void (%struct.forces_params.236*)* undef, !"kernel", i32 1}
!76 = !{void (%struct.forces_params.250*)* undef, !"kernel", i32 1}
!77 = !{void (%struct.forces_params.256*)* undef, !"kernel", i32 1}
!78 = !{void (%struct.finalize_forces_params.262*)* undef, !"kernel", i32 1}
!79 = !{void (%struct.neibs_interaction_params*, %struct.float4*)* undef, !"kernel", i32 1}
!80 = !{void (%struct.neibs_interaction_params*, %struct.float4*)* undef, !"maxntidx", i32 128}
!81 = !{void (%struct.neibs_interaction_params*, %struct.float4*)* undef, !"minctasm", i32 6}
!82 = !{void (%struct.neibs_interaction_params*, %struct.float3*)* undef, !"kernel", i32 1}
!83 = !{void (%"struct.cupostprocess::testpoints_params"*)* undef, !"kernel", i32 1}
!84 = !{void (%struct.neibs_interaction_params*, %struct.float4*, %struct.ushort4*)* undef, !"kernel", i32 1}
!85 = !{void (%struct.neibs_interaction_params*, %struct.float4*, %struct.ushort4*, float)* undef, !"kernel", i32 1}
!86 = !{void (%struct.reorder_params.278*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"kernel", i32 1}
!87 = !{void (%struct.reorder_params.278*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"maxntidx", i32 256}
!88 = !{void (%struct.reorder_params.278*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"minctasm", i32 6}
!89 = !{void (%struct.buildneibs_params.279*)* undef, !"kernel", i32 1}
!90 = !{void (%struct.buildneibs_params.279*)* undef, !"maxntidx", i32 256}
!91 = !{void (%struct.buildneibs_params.279*)* undef, !"minctasm", i32 5}
!92 = !{void (%struct.euler_params.282*)* undef, !"kernel", i32 1}
!93 = !{void (%struct.euler_params.285*)* undef, !"kernel", i32 1}
!94 = !{void (%struct.euler_params.288*)* undef, !"kernel", i32 1}
!95 = !{void (%struct.euler_params.291*)* undef, !"kernel", i32 1}
!96 = !{void (%struct.forces_params.294*)* undef, !"kernel", i32 1}
!97 = !{void (%struct.forces_params.300*)* undef, !"kernel", i32 1}
!98 = !{void (%struct.finalize_forces_params.305*)* undef, !"kernel", i32 1}
!99 = !{void (%struct.forces_params.312*)* undef, !"kernel", i32 1}
!100 = !{void (%struct.forces_params.319*)* undef, !"kernel", i32 1}
!101 = !{void (%struct.forces_params.325*)* undef, !"kernel", i32 1}
!102 = !{void (%struct.finalize_forces_params.331*)* undef, !"kernel", i32 1}
!103 = !{void (%"struct.cupostprocess::testpoints_params.336"*)* undef, !"kernel", i32 1}
!104 = !{void (%struct.density_diffusion_params.339*)* undef, !"kernel", i32 1}
!105 = !{void (%struct.forces_params.341*)* undef, !"kernel", i32 1}
!106 = !{void (%struct.forces_params.346*)* undef, !"kernel", i32 1}
!107 = !{void (%struct.forces_params.351*)* undef, !"kernel", i32 1}
!108 = !{void (%struct.forces_params.356*)* undef, !"kernel", i32 1}
!109 = !{void (%struct.forces_params.361*)* undef, !"kernel", i32 1}
!110 = !{void (%struct.forces_params.366*)* undef, !"kernel", i32 1}
!111 = !{void (%struct.density_diffusion_params.371*)* undef, !"kernel", i32 1}
!112 = !{void (%struct.forces_params.373*)* undef, !"kernel", i32 1}
!113 = !{void (%struct.forces_params.378*)* undef, !"kernel", i32 1}
!114 = !{void (%struct.forces_params.383*)* undef, !"kernel", i32 1}
!115 = !{void (%struct.forces_params.388*)* undef, !"kernel", i32 1}
!116 = !{void (%struct.forces_params.393*)* undef, !"kernel", i32 1}
!117 = !{void (%struct.forces_params.398*)* undef, !"kernel", i32 1}
!118 = !{void (%struct.density_diffusion_params.403*)* undef, !"kernel", i32 1}
!119 = !{void (%struct.forces_params.405*)* undef, !"kernel", i32 1}
!120 = !{void (%struct.forces_params.410*)* undef, !"kernel", i32 1}
!121 = !{void (%struct.forces_params.415*)* @_ZN8cuforces12forcesDeviceI13forces_paramsIL10KernelType3EL14SPHFormulation1EL20DensityDiffusionType3EL12BoundaryType4E12FullViscSpecIL12RheologyType0EL15TurbulenceModel1EL26ComputationalViscosityType0EL12ViscousModel0EL15AverageOperator0ELm517ELb0EELm517EL12ParticleType1ELSD_0EL7RunMode1ELb0ELb0ELb0ELb0E5emptyI18xsph_forces_paramsESF_I20volume_forces_paramsESF_I21grenier_forces_paramsESF_I25sa_boundary_forces_paramsESF_I28dummy_boundary_forces_paramsESF_I25water_depth_forces_paramsESF_I18keps_forces_paramsESF_I14tau_tex_paramsESF_I22eulerVel_forces_paramsESF_I29internal_energy_forces_paramsESF_I28effective_visc_forces_paramsEELS2_3ELS3_1ELS4_3ELS5_4ESC_Lm517ELSD_1ELSD_0EEEvT_, !"kernel", i32 1}
!122 = !{void (%struct.forces_params.420*)* undef, !"kernel", i32 1}
!123 = !{void (%struct.forces_params.425*)* undef, !"kernel", i32 1}
!124 = !{void (%struct.forces_params.430*)* undef, !"kernel", i32 1}
!125 = !{!126, !126, i64 0}
!126 = !{!"vtable pointer", !127, i64 0}
!127 = !{!"Simple C++ TBAA"}
!128 = !{!129, !129, i64 0}
!129 = !{!"long", !130, i64 0}
!130 = !{!"omnipotent char", !127, i64 0}
!131 = !{!132, !129, i64 64}
!132 = !{!"_ZTSN7cuneibs22neiblist_iterator_coreE", !133, i64 0, !133, i64 8, !134, i64 16, !136, i64 32, !137, i64 44, !138, i64 48, !129, i64 64, !137, i64 72, !130, i64 76, !137, i64 80}
!133 = !{!"any pointer", !130, i64 0}
!134 = !{!"_ZTS6float4", !135, i64 0, !135, i64 4, !135, i64 8, !135, i64 12}
!135 = !{!"float", !130, i64 0}
!136 = !{!"_ZTS4int3", !137, i64 0, !137, i64 4, !137, i64 8}
!137 = !{!"int", !130, i64 0}
!138 = !{!"_ZTS6float3", !135, i64 0, !135, i64 4, !135, i64 8}
!139 = distinct !{!139, !140}
!140 = !{!"llvm.loop.peeled.count", i32 1}
@Artem-B
Copy link
Author

Artem-B commented Oct 11, 2021

bin/opt -sroa -S -debug-only=sroa < head/ir/head-01076.ll

SROA fails with:

SROA function: _ZN8cuforces12forcesDeviceI13forces_paramsIL10KernelType3EL14SPHFormulation1EL20DensityDiffusionType3EL12BoundaryType4E12FullViscSpecIL12RheologyType0EL15TurbulenceModel1EL26ComputationalViscosityType0EL12ViscousModel0EL15AverageOperator0ELm517ELb0EELm517EL12ParticleType1ELSD_0EL7RunMode1ELb0ELb0ELb0ELb0E5emptyI18xsph_forces_paramsESF_I20volume_forces_paramsESF_I21grenier_forces_paramsESF_I25sa_boundary_forces_paramsESF_I28dummy_boundary_forces_paramsESF_I25water_depth_forces_paramsESF_I18keps_forces_paramsESF_I14tau_tex_paramsESF_I22eulerVel_forces_paramsESF_I29internal_energy_forces_paramsESF_I28effective_visc_forces_paramsEELS2_3ELS3_1ELS4_3ELS5_4ESC_Lm517ELSD_1ELSD_0EEEvT_
SROA alloca:   %3 = alloca %"class.cuneibs::neiblist_iterator.1", align 16
  Rewriting FCA loads and stores...
Can't analyze slices for alloca:   %3 = alloca %"class.cuneibs::neiblist_iterator.1", align 16
  A pointer to this alloca escaped by:
    %91 = load i32, i32 addrspace(5)* %90, align 4

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment