Created
October 11, 2021 20:34
-
-
Save Artem-B/0e8786afff6e6838b5cf5a9e21851b5c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;*** IR Dump After Combine redundant instructions *** (function: _ZN8cuforces12forcesDeviceI13forces_paramsIL10KernelType3EL14SPHFormulation1EL20DensityDiffusionType3EL12BoundaryType4E12FullViscSpecIL12RheologyType0EL15TurbulenceModel1EL26ComputationalViscosityType0EL12ViscousModel0EL15AverageOperator0ELm517ELb0EELm517EL12ParticleType1ELSD_0EL7RunMode1ELb0ELb0ELb0ELb0E5emptyI18xsph_forces_paramsESF_I20volume_forces_paramsESF_I21grenier_forces_paramsESF_I25sa_boundary_forces_paramsESF_I28dummy_boundary_forces_paramsESF_I25water_depth_forces_paramsESF_I18keps_forces_paramsESF_I14tau_tex_paramsESF_I22eulerVel_forces_paramsESF_I29internal_energy_forces_paramsESF_I28effective_visc_forces_paramsEELS2_3ELS3_1ELS4_3ELS5_4ESC_Lm517ELSD_1ELSD_0EEEvT_) | |
; ModuleID = 'reduced.ll.ll' | |
source_filename = "<stdin>" | |
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" | |
target triple = "nvptx64-nvidia-cuda" | |
%struct.char3 = type { i8, i8, i8 } | |
%"class.cuneibs::neiblist_iterator_core" = type <{ i32*, i16*, %struct.float4, %struct.int3, i32, %struct.float3, [4 x i8], i64, i32, i8, [3 x i8], i32, [12 x i8] }> | |
%struct.float4 = type { float, float, float, float } | |
%struct.int3 = type { i32, i32, i32 } | |
%struct.float3 = type { float, float, float } | |
%struct.pos_wrapper = type { %struct.float4* } | |
%"class.cuneibs::neiblist_iterator.1" = type { %"class.cuneibs::neiblist_iterator_simple.base.3", [8 x i8], %"class.cuneibs::neiblist_iterator_core.base", [12 x i8] } | |
%"class.cuneibs::neiblist_iterator_simple.base.3" = type { i32 (...)** } | |
%"class.cuneibs::neiblist_iterator_core.base" = type <{ i32*, i16*, %struct.float4, %struct.int3, i32, %struct.float3, [4 x i8], i64, i32, i8, [3 x i8], i32 }> | |
%"class.cuneibs::neiblist_iterator_simple.2" = type { i32 (...)**, [8 x i8], %"class.cuneibs::neiblist_iterator_core.base", [12 x i8] } | |
%struct.forces_params.415 = type { %struct.common_forces_params } | |
%struct.common_forces_params = type { %struct.stage_common_forces_params.base, i16*, float, float, i32, float } | |
%struct.stage_common_forces_params.base = type <{ %struct.pos_info_wrapper, %struct.vel_wrapper, %struct.float4*, i32*, i32*, i32, i32, float }> | |
%struct.pos_info_wrapper = type { %struct.pos_wrapper, %struct.info_wrapper } | |
%struct.info_wrapper = type { %struct.ushort4* } | |
%struct.ushort4 = type { i16, i16, i16, i16 } | |
%struct.vel_wrapper = type { %struct.float4* } | |
%struct.cell_params = type { %struct.cellStart_wrapper, %struct.cellEnd_wrapper } | |
%struct.cellStart_wrapper = type { i32* } | |
%struct.cellEnd_wrapper = type { i32* } | |
%struct.uint4 = type { i32, i32, i32, i32 } | |
%struct.jacobi_update_params = type <{ %struct.info_wrapper, %struct.float4*, float*, float*, i32, [4 x i8] }> | |
%struct.sa_outgoing_bc_params = type { %struct.neibs_list_params.base, %struct.vel_wrapper, %struct.boundelements_wrapper, %struct.vertPos_params, %struct.uint4*, %struct.float4* } | |
%struct.neibs_list_params.base = type <{ %struct.pos_info_wrapper, i32*, i32*, i16*, i32, float, float }> | |
%struct.boundelements_wrapper = type { %struct.float4* } | |
%struct.vertPos_params = type { %struct.float2.0*, %struct.float2.0*, %struct.float2.0* } | |
%struct.float2.0 = type { float, float } | |
%struct.reorder_params = type { %struct.reorder_data, %struct.reorder_data.9, %struct.reorder_data.30 } | |
%struct.reorder_data = type { %struct.float4*, %struct.float4* } | |
%struct.reorder_data.9 = type { %struct.float4*, %struct.float4* } | |
%struct.reorder_data.30 = type { %struct.float4*, %struct.float4* } | |
%"class.thrust::zip_iterator" = type { %"class.thrust::tuple.32" } | |
%"class.thrust::tuple.32" = type { %"struct.thrust::detail::cons.33" } | |
%"struct.thrust::detail::cons.33" = type { %"class.thrust::device_ptr", %"struct.thrust::detail::cons.35" } | |
%"class.thrust::device_ptr" = type { %"class.thrust::pointer" } | |
%"class.thrust::pointer" = type { %"class.thrust::iterator_adaptor" } | |
%"class.thrust::iterator_adaptor" = type { i32* } | |
%"struct.thrust::detail::cons.35" = type { %"class.thrust::device_ptr.36" } | |
%"class.thrust::device_ptr.36" = type { %"class.thrust::pointer.37" } | |
%"class.thrust::pointer.37" = type { %"class.thrust::iterator_adaptor.38" } | |
%"class.thrust::iterator_adaptor.38" = type { %struct.ushort4* } | |
%"class.thrust::tuple" = type { %"struct.thrust::detail::cons" } | |
%"struct.thrust::detail::cons" = type { i32, [4 x i8], %"struct.thrust::detail::cons.31" } | |
%"struct.thrust::detail::cons.31" = type { %struct.ushort4 } | |
%struct.ptype_hash_compare = type { i8 } | |
%"struct.thrust::cuda_cub::__transform::unary_transform_f" = type <{ %"class.thrust::device_ptr", i32*, %"struct.thrust::identity", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }> | |
%"struct.thrust::identity" = type { i8 } | |
%"struct.thrust::cuda_cub::__transform::always_true_predicate" = type { i8 } | |
%"struct.thrust::cuda_cub::__transform::unary_transform_f.69" = type <{ %"class.thrust::device_ptr.36", %struct.ushort4*, %"struct.thrust::identity.70", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }> | |
%"struct.thrust::identity.70" = type { i8 } | |
%"struct.thrust::cuda_cub::__transform::unary_transform_f.72" = type <{ i32*, %"class.thrust::device_ptr", %"struct.thrust::identity", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }> | |
%"struct.thrust::cuda_cub::__transform::unary_transform_f.74" = type <{ %struct.ushort4*, %"class.thrust::device_ptr.36", %"struct.thrust::identity.70", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }> | |
%"struct.thrust::cuda_cub::__transform::unary_transform_f.79" = type <{ %"class.thrust::device_ptr.36", %"class.thrust::device_ptr.36", %"struct.thrust::identity.70", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }> | |
%"struct.thrust::cuda_cub::__transform::unary_transform_f.81" = type <{ %"class.thrust::device_ptr", %"class.thrust::device_ptr", %"struct.thrust::identity", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }> | |
%struct.buildneibs_params = type { %struct.common_buildneibs_params, %struct.planes_buildneibs_params } | |
%struct.common_buildneibs_params = type { %struct.pos_info_wrapper, %struct.cell_params, i32*, i16*, i32, float } | |
%struct.planes_buildneibs_params = type { %struct.int4* } | |
%struct.int4 = type { i32, i32, i32, i32 } | |
%struct.euler_params = type { %struct.common_euler_params.base, %struct.dummy_euler_params } | |
%struct.common_euler_params.base = type <{ %struct.Pos_params, %struct.Vel_params, i32*, %struct.ushort4*, %struct.float4*, i32, float, float }> | |
%struct.Pos_params = type { %struct.float4*, %struct.float4* } | |
%struct.Vel_params = type { %struct.float4*, %struct.float4* } | |
%struct.dummy_euler_params = type { %struct.float4* } | |
%struct.euler_params.111 = type { %struct.common_euler_params.base, %struct.dummy_euler_params } | |
%struct.euler_params.114 = type { %struct.common_euler_params.base, %struct.dummy_euler_params } | |
%struct.euler_params.117 = type { %struct.common_euler_params.base, %struct.dummy_euler_params } | |
%"struct.cub::ReduceByKeyScanTileState" = type { %"struct.cub::ScanTileState" } | |
%"struct.cub::ScanTileState" = type { i8*, %"struct.cub::KeyValuePair"*, %"struct.cub::KeyValuePair"* } | |
%"struct.cub::KeyValuePair" = type { i32, [12 x i8], %struct.float4 } | |
%"class.thrust::device_ptr.120" = type { %"class.thrust::pointer.121" } | |
%"class.thrust::pointer.121" = type { %"class.thrust::iterator_adaptor.122" } | |
%"class.thrust::iterator_adaptor.122" = type { i32* } | |
%"class.thrust::device_ptr.124" = type { %"class.thrust::pointer.125" } | |
%"class.thrust::pointer.125" = type { %"class.thrust::iterator_adaptor.126" } | |
%"class.thrust::iterator_adaptor.126" = type { %struct.float4* } | |
%"struct.thrust::equal_to" = type { i8 } | |
%"struct.thrust::plus" = type { i8 } | |
%"struct.thrust::cuda_cub::__scan_by_key::DoNothing" = type { i8 } | |
%"struct.thrust::cuda_cub::__transform::unary_transform_f.166" = type <{ %struct.float4*, %"class.thrust::device_ptr.124", %"struct.thrust::identity.167", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }> | |
%"struct.thrust::identity.167" = type { i8 } | |
%"struct.thrust::cuda_cub::__transform::unary_transform_f.169" = type <{ %"class.thrust::device_ptr.120", i32*, %"struct.thrust::identity", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }> | |
%"struct.thrust::cuda_cub::__transform::unary_transform_f.171" = type <{ %"class.thrust::device_ptr.124", %struct.float4*, %"struct.thrust::identity.167", %"struct.thrust::cuda_cub::__transform::always_true_predicate", [6 x i8] }> | |
%struct.density_diffusion_params = type { %struct.common_density_diffusion_params.base, [4 x i8] } | |
%struct.common_density_diffusion_params.base = type <{ %struct.float4*, %struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i32*, i16*, i32, float, float, float, float }> | |
%struct.forces_params = type { %struct.common_forces_params } | |
%struct.forces_params.223 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params } | |
%struct.dummy_boundary_forces_params = type { %struct.float4* } | |
%struct.finalize_forces_params = type { %struct.common_finalize_forces_params, %struct.planes_forces_params, %struct.dyndt_finalize_forces_params } | |
%struct.common_finalize_forces_params = type { %struct.stage_common_forces_params.base, %struct.float4*, float, %struct.float4*, %struct.float4* } | |
%struct.planes_forces_params = type { %struct.int4* } | |
%struct.dyndt_finalize_forces_params = type { float*, float*, float*, i32, i32 } | |
%struct.forces_params.236 = type { %struct.common_forces_params } | |
%struct.forces_params.250 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params } | |
%struct.forces_params.256 = type { %struct.common_forces_params } | |
%struct.finalize_forces_params.262 = type { %struct.common_finalize_forces_params.263, %struct.planes_forces_params, %struct.dyndt_finalize_forces_params } | |
%struct.common_finalize_forces_params.263 = type { %struct.stage_common_forces_params.base, %struct.float4*, %struct.float4*, %struct.float4* } | |
%struct.neibs_interaction_params = type { %struct.neibs_list_params.base, %struct.vel_wrapper } | |
%"struct.cupostprocess::testpoints_params" = type { %struct.neibs_interaction_params, %struct.float4* } | |
%struct.reorder_params.278 = type { %struct.reorder_data, %struct.reorder_data.9, %struct.reorder_data.30 } | |
%struct.buildneibs_params.279 = type { %struct.common_buildneibs_params } | |
%struct.euler_params.282 = type { %struct.common_euler_params.base, %struct.dummy_euler_params } | |
%struct.euler_params.285 = type { %struct.common_euler_params.base, %struct.dummy_euler_params } | |
%struct.euler_params.288 = type { %struct.common_euler_params.base, %struct.dummy_euler_params } | |
%struct.euler_params.291 = type { %struct.common_euler_params.base, %struct.dummy_euler_params } | |
%struct.forces_params.294 = type { %struct.common_forces_params } | |
%struct.forces_params.300 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params } | |
%struct.finalize_forces_params.305 = type { %struct.common_finalize_forces_params, %struct.dyndt_finalize_forces_params } | |
%struct.forces_params.312 = type { %struct.common_forces_params } | |
%struct.forces_params.319 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params } | |
%struct.forces_params.325 = type { %struct.common_forces_params } | |
%struct.finalize_forces_params.331 = type { %struct.common_finalize_forces_params.263, %struct.dyndt_finalize_forces_params } | |
%"struct.cupostprocess::testpoints_params.336" = type { %struct.neibs_interaction_params, %struct.float4* } | |
%struct.density_diffusion_params.339 = type { %struct.common_density_diffusion_params.base, [4 x i8] } | |
%struct.forces_params.341 = type { %struct.common_forces_params } | |
%struct.forces_params.346 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params } | |
%struct.forces_params.351 = type { %struct.common_forces_params } | |
%struct.forces_params.356 = type { %struct.common_forces_params } | |
%struct.forces_params.361 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params } | |
%struct.forces_params.366 = type { %struct.common_forces_params } | |
%struct.density_diffusion_params.371 = type { %struct.common_density_diffusion_params.base, [4 x i8] } | |
%struct.forces_params.373 = type { %struct.common_forces_params } | |
%struct.forces_params.378 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params } | |
%struct.forces_params.383 = type { %struct.common_forces_params } | |
%struct.forces_params.388 = type { %struct.common_forces_params } | |
%struct.forces_params.393 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params } | |
%struct.forces_params.398 = type { %struct.common_forces_params } | |
%struct.density_diffusion_params.403 = type { %struct.common_density_diffusion_params.base, [4 x i8] } | |
%struct.forces_params.405 = type { %struct.common_forces_params } | |
%struct.forces_params.410 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params } | |
%struct.forces_params.420 = type { %struct.common_forces_params } | |
%struct.forces_params.425 = type { %struct.common_forces_params, %struct.dummy_boundary_forces_params } | |
%struct.forces_params.430 = type { %struct.common_forces_params } | |
$_ZNK7cuneibs22neiblist_iterator_core10neib_indexEv = comdat any | |
$_ZNK11pos_wrapper8fetchPosEj = comdat any | |
$_ZN7cuneibs22neiblist_iterator_core17update_neib_indexEt = comdat any | |
$_ZN7cuneibs12getNeibIndexERK6float4R6float3PKjtRK4int3RhRj = comdat any | |
$_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv = comdat any | |
$_ZNK7cuneibs22neiblist_iterator_core6relPosERK6float4 = comdat any | |
$_ZN7cuneibs14neib_list_stepIL12ParticleType0EEEmv = comdat any | |
$_ZTVN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEE = comdat any | |
@_ZN7cuneibs16d_cell_to_offsetE = external addrspace(4) externally_initialized global [27 x %struct.char3], align 1 | |
@_ZN7cuneibs17d_neiblist_strideE = external local_unnamed_addr addrspace(4) externally_initialized global i64, align 8 | |
@_ZTVN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEE = linkonce_odr unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* inttoptr (i64 16 to i8*), i8* null, i8* null] }, comdat, align 8 | |
; Function Attrs: argmemonly nounwind willreturn | |
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0 | |
define linkonce_odr i32* @_ZNK7cuneibs22neiblist_iterator_core10neib_indexEv(%"class.cuneibs::neiblist_iterator_core"* %0) local_unnamed_addr comdat align 2 { | |
%2 = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 11 | |
ret i32* %2 | |
} | |
declare i1 @_ZL8isfinitef(float) local_unnamed_addr | |
define linkonce_odr %struct.float4 @_ZNK11pos_wrapper8fetchPosEj(%struct.pos_wrapper* %0, i32 %1) local_unnamed_addr comdat align 2 { | |
%3 = getelementptr inbounds %struct.pos_wrapper, %struct.pos_wrapper* %0, i64 0, i32 0 | |
%4 = load %struct.float4*, %struct.float4** %3, align 8 | |
%5 = zext i32 %1 to i64 | |
%6 = getelementptr inbounds %struct.float4, %struct.float4* %4, i64 %5 | |
%7 = bitcast %struct.float4* %6 to i8* | |
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1 %7, i64 undef, i1 false) | |
ret %struct.float4 undef | |
} | |
declare %struct.float4 @_ZL11make_float4ffff(float, float) local_unnamed_addr | |
declare %struct.float3 @_ZL11make_float3fff(float, float) local_unnamed_addr | |
define linkonce_odr void @_ZN7cuneibs22neiblist_iterator_coreC2EjRK6float4RK4int3PKjPKt(%"class.cuneibs::neiblist_iterator_core"* %0, i16* %1) unnamed_addr align 2 { | |
%3 = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 1 | |
store i16* %1, i16** %3, align 8 | |
ret void | |
} | |
define linkonce_odr void @_ZN7cuneibs22neiblist_iterator_core17update_neib_indexEt(%"class.cuneibs::neiblist_iterator_core"* %0, i16 %1) local_unnamed_addr comdat align 2 { | |
%3 = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 0 | |
%4 = load i32*, i32** %3, align 16 | |
%5 = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 9 | |
%6 = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 8 | |
%7 = lshr i16 %1, 11 | |
%8 = trunc i16 %7 to i8 | |
%9 = add nsw i8 %8, -1 | |
store i8 %9, i8* %5, align 1 | |
%10 = tail call %struct.float3 @_ZmlRK5char3RK6float3() | |
%11 = extractvalue %struct.float3 %10, 0 | |
%12 = extractvalue %struct.float3 %10, 2 | |
%.idx.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 2, i32 0 | |
%.idx.val.i = load float, float* %.idx.i, align 4 | |
%13 = getelementptr %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 2, i32 2 | |
%.idx5.val.i = load float, float* %13, align 4 | |
%14 = fsub contract float %.idx.val.i, %11 | |
%15 = fsub contract float %.idx5.val.i, %12 | |
%16 = tail call %struct.float3 @_ZL11make_float3fff(float %14, float %15) | |
%oldret.i.i = extractvalue %struct.float3 %16, 0 | |
%oldret1.i.i = extractvalue %struct.float3 %16, 1 | |
%oldret3.i.i = extractvalue %struct.float3 %16, 2 | |
%.sroa.014.0..sroa_idx.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 5, i32 0 | |
store float %oldret.i.i, float* %.sroa.014.0..sroa_idx.i, align 1 | |
%.sroa.215.0..sroa_idx16.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 5, i32 1 | |
store float %oldret1.i.i, float* %.sroa.215.0..sroa_idx16.i, align 1 | |
%.sroa.3.0..sroa_idx17.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 5, i32 2 | |
store float %oldret3.i.i, float* %.sroa.3.0..sroa_idx17.i, align 1 | |
%17 = load i8, i8* %5, align 1 | |
%18 = zext i8 %17 to i64 | |
%19 = getelementptr inbounds [27 x %struct.char3], [27 x %struct.char3] addrspace(4)* @_ZN7cuneibs16d_cell_to_offsetE, i64 0, i64 %18 | |
%20 = addrspacecast %struct.char3 addrspace(4)* %19 to %struct.char3* | |
%.idx8.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 3, i32 0 | |
%.idx8.val.i = load i32, i32* %.idx8.i, align 4 | |
%.idx9.i = getelementptr %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 3, i32 1 | |
%.idx9.val.i = load i32, i32* %.idx9.i, align 4 | |
%.idx10.i = getelementptr %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 3, i32 2 | |
%.idx10.val.i = load i32, i32* %.idx10.i, align 4 | |
%.idx11.i = getelementptr %struct.char3, %struct.char3* %20, i64 0, i32 0 | |
%.idx11.val.i = load i8, i8* %.idx11.i, align 1 | |
%.idx12.i = getelementptr %struct.char3, %struct.char3* %20, i64 0, i32 1 | |
%.idx12.val.i = load i8, i8* %.idx12.i, align 1 | |
%.idx13.i = getelementptr %struct.char3, %struct.char3* %20, i64 0, i32 2 | |
%.idx13.val.i = load i8, i8* %.idx13.i, align 1 | |
%21 = sext i8 %.idx11.val.i to i32 | |
%22 = add nsw i32 %.idx8.val.i, %21 | |
%23 = sext i8 %.idx12.val.i to i32 | |
%24 = add nsw i32 %.idx9.val.i, %23 | |
%25 = sext i8 %.idx13.val.i to i32 | |
%26 = add nsw i32 %.idx10.val.i, %25 | |
%27 = tail call %struct.int3 @_ZL9make_int3iii(i32 %22, i32 %24, i32 %26) | |
%28 = getelementptr inbounds i32, i32* %4, i64 undef | |
%29 = load i32, i32* %28, align 4 | |
store i32 %29, i32* %6, align 4 | |
ret void | |
} | |
define linkonce_odr i32 @_ZN7cuneibs12getNeibIndexERK6float4R6float3PKjtRK4int3RhRj(%struct.float4* %0, %struct.float3* %1, i32* %2, i16 %3, %struct.int3* %4, i8* %5, i32* %6) local_unnamed_addr comdat { | |
%8 = lshr i16 %3, 11 | |
%9 = trunc i16 %8 to i8 | |
%10 = add nsw i8 %9, -1 | |
store i8 %10, i8* %5, align 1 | |
%11 = tail call %struct.float3 @_ZmlRK5char3RK6float3() | |
%12 = extractvalue %struct.float3 %11, 0 | |
%13 = extractvalue %struct.float3 %11, 2 | |
%.idx = getelementptr %struct.float4, %struct.float4* %0, i64 0, i32 0 | |
%.idx.val = load float, float* %.idx, align 4 | |
%14 = getelementptr %struct.float4, %struct.float4* %0, i64 0, i32 2 | |
%.idx5.val = load float, float* %14, align 4 | |
%15 = fsub contract float %.idx.val, %12 | |
%16 = fsub contract float %.idx5.val, %13 | |
%17 = tail call %struct.float3 @_ZL11make_float3fff(float %15, float %16) | |
%oldret.i = extractvalue %struct.float3 %17, 0 | |
%oldret1.i = extractvalue %struct.float3 %17, 1 | |
%oldret3.i = extractvalue %struct.float3 %17, 2 | |
%.sroa.014.0..sroa_idx = getelementptr inbounds %struct.float3, %struct.float3* %1, i64 0, i32 0 | |
store float %oldret.i, float* %.sroa.014.0..sroa_idx, align 1 | |
%.sroa.215.0..sroa_idx16 = getelementptr inbounds %struct.float3, %struct.float3* %1, i64 0, i32 1 | |
store float %oldret1.i, float* %.sroa.215.0..sroa_idx16, align 1 | |
%.sroa.3.0..sroa_idx17 = getelementptr inbounds %struct.float3, %struct.float3* %1, i64 0, i32 2 | |
store float %oldret3.i, float* %.sroa.3.0..sroa_idx17, align 1 | |
%18 = load i8, i8* %5, align 1 | |
%19 = zext i8 %18 to i64 | |
%20 = getelementptr inbounds [27 x %struct.char3], [27 x %struct.char3] addrspace(4)* @_ZN7cuneibs16d_cell_to_offsetE, i64 0, i64 %19 | |
%21 = addrspacecast %struct.char3 addrspace(4)* %20 to %struct.char3* | |
%.idx8 = getelementptr %struct.int3, %struct.int3* %4, i64 0, i32 0 | |
%.idx8.val = load i32, i32* %.idx8, align 4 | |
%.idx9 = getelementptr %struct.int3, %struct.int3* %4, i64 0, i32 1 | |
%.idx9.val = load i32, i32* %.idx9, align 4 | |
%.idx10 = getelementptr %struct.int3, %struct.int3* %4, i64 0, i32 2 | |
%.idx10.val = load i32, i32* %.idx10, align 4 | |
%.idx11 = getelementptr %struct.char3, %struct.char3* %21, i64 0, i32 0 | |
%.idx11.val = load i8, i8* %.idx11, align 1 | |
%.idx12 = getelementptr %struct.char3, %struct.char3* %21, i64 0, i32 1 | |
%.idx12.val = load i8, i8* %.idx12, align 1 | |
%.idx13 = getelementptr %struct.char3, %struct.char3* %21, i64 0, i32 2 | |
%.idx13.val = load i8, i8* %.idx13, align 1 | |
%22 = sext i8 %.idx11.val to i32 | |
%23 = add nsw i32 %.idx8.val, %22 | |
%24 = sext i8 %.idx12.val to i32 | |
%25 = add nsw i32 %.idx9.val, %24 | |
%26 = sext i8 %.idx13.val to i32 | |
%27 = add nsw i32 %.idx10.val, %26 | |
%28 = tail call %struct.int3 @_ZL9make_int3iii(i32 %23, i32 %25, i32 %27) | |
%29 = getelementptr inbounds i32, i32* %2, i64 undef | |
%30 = load i32, i32* %29, align 4 | |
store i32 %30, i32* %6, align 4 | |
ret i32 undef | |
} | |
declare %struct.float3 @_ZmlRK5char3RK6float3() local_unnamed_addr | |
declare %struct.int3 @_ZL9make_int3iii(i32, i32, i32) local_unnamed_addr | |
define linkonce_odr void @_ZN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEC1EjRK6float4RK4int3PKjPKt(%"class.cuneibs::neiblist_iterator.1"* %0, i16* %1) unnamed_addr align 2 { | |
%3 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %0, i64 0, i32 2, i32 1 | |
store i16* %1, i16** %3, align 8 | |
%4 = getelementptr %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %0, i64 0, i32 0, i32 0 | |
store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEE, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)*** %4, align 8, !tbaa !125 | |
ret void | |
} | |
define linkonce_odr i1 @_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv(%"class.cuneibs::neiblist_iterator_simple.2"* %0) local_unnamed_addr comdat align 2 { | |
%2 = load i64, i64* addrspacecast (i64 addrspace(4)* @_ZN7cuneibs17d_neiblist_strideE to i64*), align 8, !tbaa !128 | |
%3 = bitcast %"class.cuneibs::neiblist_iterator_simple.2"* %0 to i8** | |
%4 = load i8*, i8** %3, align 8, !tbaa !125 | |
%5 = getelementptr i8, i8* %4, i64 -24 | |
%6 = bitcast i8* %5 to i64* | |
%7 = load i64, i64* %6, align 8 | |
%8 = bitcast %"class.cuneibs::neiblist_iterator_simple.2"* %0 to i8* | |
%9 = getelementptr inbounds i8, i8* %8, i64 %7 | |
%10 = getelementptr inbounds i8, i8* %9, i64 64 | |
%11 = bitcast i8* %10 to i64* | |
%12 = load i64, i64* %11, align 16 | |
%13 = add i64 %12, %2 | |
store i64 %13, i64* %11, align 16, !tbaa !131 | |
%14 = load i64, i64* %6, align 8 | |
%15 = getelementptr inbounds i8, i8* %8, i64 %14 | |
%16 = getelementptr inbounds i8, i8* %15, i64 8 | |
%17 = bitcast i8* %16 to i16** | |
%18 = load i16*, i16** %17, align 8 | |
%19 = getelementptr inbounds i8, i8* %15, i64 64 | |
%20 = bitcast i8* %19 to i64* | |
%21 = load i64, i64* %20, align 16 | |
%22 = getelementptr inbounds i8, i8* %15, i64 44 | |
%23 = bitcast i8* %22 to i32* | |
%24 = load i32, i32* %23, align 4 | |
%25 = zext i32 %24 to i64 | |
%26 = add i64 %21, %25 | |
%27 = getelementptr inbounds i16, i16* %18, i64 %26 | |
%28 = load i16, i16* %27, align 2 | |
%29 = icmp eq i16 %28, -1 | |
br i1 %29, label %67, label %30 | |
30: ; preds = %1 | |
%31 = bitcast i8* %15 to i32** | |
%32 = load i32*, i32** %31, align 16 | |
%33 = getelementptr inbounds i8, i8* %15, i64 76 | |
%34 = getelementptr inbounds i8, i8* %15, i64 72 | |
%35 = bitcast i8* %34 to i32* | |
%36 = lshr i16 %28, 11 | |
%37 = trunc i16 %36 to i8 | |
%38 = add nsw i8 %37, -1 | |
store i8 %38, i8* %33, align 1 | |
%39 = tail call %struct.float3 @_ZmlRK5char3RK6float3() | |
%40 = extractvalue %struct.float3 %39, 0 | |
%41 = extractvalue %struct.float3 %39, 2 | |
%.idx.i.i = getelementptr inbounds i8, i8* %15, i64 16 | |
%42 = bitcast i8* %.idx.i.i to float* | |
%.idx.val.i.i = load float, float* %42, align 4 | |
%43 = getelementptr i8, i8* %15, i64 24 | |
%44 = bitcast i8* %43 to float* | |
%.idx5.val.i.i = load float, float* %44, align 4 | |
%45 = fsub contract float %.idx.val.i.i, %40 | |
%46 = fsub contract float %.idx5.val.i.i, %41 | |
%47 = tail call %struct.float3 @_ZL11make_float3fff(float %45, float %46) | |
%oldret.i.i.i = extractvalue %struct.float3 %47, 0 | |
%oldret1.i.i.i = extractvalue %struct.float3 %47, 1 | |
%oldret3.i.i.i = extractvalue %struct.float3 %47, 2 | |
%.sroa.014.0..sroa_idx.i.i = getelementptr inbounds i8, i8* %15, i64 48 | |
%48 = bitcast i8* %.sroa.014.0..sroa_idx.i.i to float* | |
store float %oldret.i.i.i, float* %48, align 1 | |
%.sroa.215.0..sroa_idx16.i.i = getelementptr inbounds i8, i8* %15, i64 52 | |
%49 = bitcast i8* %.sroa.215.0..sroa_idx16.i.i to float* | |
store float %oldret1.i.i.i, float* %49, align 1 | |
%.sroa.3.0..sroa_idx17.i.i = getelementptr inbounds i8, i8* %15, i64 56 | |
%50 = bitcast i8* %.sroa.3.0..sroa_idx17.i.i to float* | |
store float %oldret3.i.i.i, float* %50, align 1 | |
%51 = load i8, i8* %33, align 1 | |
%52 = zext i8 %51 to i64 | |
%53 = getelementptr inbounds [27 x %struct.char3], [27 x %struct.char3] addrspace(4)* @_ZN7cuneibs16d_cell_to_offsetE, i64 0, i64 %52 | |
%54 = addrspacecast %struct.char3 addrspace(4)* %53 to %struct.char3* | |
%.idx8.i.i = getelementptr inbounds i8, i8* %15, i64 32 | |
%55 = bitcast i8* %.idx8.i.i to i32* | |
%.idx8.val.i.i = load i32, i32* %55, align 4 | |
%.idx9.i.i = getelementptr i8, i8* %15, i64 36 | |
%56 = bitcast i8* %.idx9.i.i to i32* | |
%.idx9.val.i.i = load i32, i32* %56, align 4 | |
%.idx10.i.i = getelementptr i8, i8* %15, i64 40 | |
%57 = bitcast i8* %.idx10.i.i to i32* | |
%.idx10.val.i.i = load i32, i32* %57, align 4 | |
%.idx11.i.i = getelementptr %struct.char3, %struct.char3* %54, i64 0, i32 0 | |
%.idx11.val.i.i = load i8, i8* %.idx11.i.i, align 1 | |
%.idx12.i.i = getelementptr %struct.char3, %struct.char3* %54, i64 0, i32 1 | |
%.idx12.val.i.i = load i8, i8* %.idx12.i.i, align 1 | |
%.idx13.i.i = getelementptr %struct.char3, %struct.char3* %54, i64 0, i32 2 | |
%.idx13.val.i.i = load i8, i8* %.idx13.i.i, align 1 | |
%58 = sext i8 %.idx11.val.i.i to i32 | |
%59 = add nsw i32 %.idx8.val.i.i, %58 | |
%60 = sext i8 %.idx12.val.i.i to i32 | |
%61 = add nsw i32 %.idx9.val.i.i, %60 | |
%62 = sext i8 %.idx13.val.i.i to i32 | |
%63 = add nsw i32 %.idx10.val.i.i, %62 | |
%64 = tail call %struct.int3 @_ZL9make_int3iii(i32 %59, i32 %61, i32 %63) | |
%65 = getelementptr inbounds i32, i32* %32, i64 undef | |
%66 = load i32, i32* %65, align 4 | |
store i32 %66, i32* %35, align 4 | |
br label %67 | |
67: ; preds = %1, %30 | |
%.0 = phi i1 [ true, %30 ], [ false, %1 ] | |
ret i1 %.0 | |
} | |
define linkonce_odr %struct.float4 @_ZNK7cuneibs22neiblist_iterator_core6relPosERK6float4(%"class.cuneibs::neiblist_iterator_core"* %0, %struct.float4* %1) local_unnamed_addr comdat align 2 { | |
%.idx = getelementptr %"class.cuneibs::neiblist_iterator_core", %"class.cuneibs::neiblist_iterator_core"* %0, i64 0, i32 5, i32 2 | |
%.idx.val = load float, float* %.idx, align 4 | |
%.idx1 = getelementptr %struct.float4, %struct.float4* %1, i64 0, i32 2 | |
%.idx1.val = load float, float* %.idx1, align 8 | |
%3 = fsub contract float %.idx.val, %.idx1.val | |
%4 = tail call %struct.float4 @_ZL11make_float4ffff(float %3, float undef) | |
ret %struct.float4 undef | |
} | |
define linkonce_odr i64 @_ZN7cuneibs14neib_list_stepIL12ParticleType0EEEmv() local_unnamed_addr comdat { | |
%1 = load i64, i64* addrspacecast (i64 addrspace(4)* @_ZN7cuneibs17d_neiblist_strideE to i64*), align 8, !tbaa !128 | |
ret i64 %1 | |
} | |
define void @_ZN8cuforces12forcesDeviceI13forces_paramsIL10KernelType3EL14SPHFormulation1EL20DensityDiffusionType3EL12BoundaryType4E12FullViscSpecIL12RheologyType0EL15TurbulenceModel1EL26ComputationalViscosityType0EL12ViscousModel0EL15AverageOperator0ELm517ELb0EELm517EL12ParticleType1ELSD_0EL7RunMode1ELb0ELb0ELb0ELb0E5emptyI18xsph_forces_paramsESF_I20volume_forces_paramsESF_I21grenier_forces_paramsESF_I25sa_boundary_forces_paramsESF_I28dummy_boundary_forces_paramsESF_I25water_depth_forces_paramsESF_I18keps_forces_paramsESF_I14tau_tex_paramsESF_I22eulerVel_forces_paramsESF_I29internal_energy_forces_paramsESF_I28effective_visc_forces_paramsEELS2_3ELS3_1ELS4_3ELS5_4ESC_Lm517ELSD_1ELSD_0EEEvT_(%struct.forces_params.415* nocapture readonly %0) local_unnamed_addr { | |
%2 = alloca %"class.cuneibs::neiblist_iterator.1", align 16 | |
%3 = getelementptr inbounds %struct.forces_params.415, %struct.forces_params.415* %0, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0 | |
%4 = bitcast %struct.forces_params.415* %0 to i8** | |
%5 = load i8*, i8** %4, align 8 | |
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1 %5, i64 undef, i1 false) | |
%6 = getelementptr inbounds %struct.forces_params.415, %struct.forces_params.415* %0, i64 0, i32 0, i32 1 | |
%7 = bitcast i16** %6 to i64* | |
%8 = load i64, i64* %7, align 8 | |
%9 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 1 | |
%10 = bitcast i16** %9 to i64* | |
store i64 %8, i64* %10, align 8 | |
%11 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 0, i32 0 | |
store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN7cuneibs17neiblist_iteratorIJL12ParticleType0EEEE, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)*** %11, align 16, !tbaa !125 | |
%12 = inttoptr i64 %8 to i16* | |
%13 = getelementptr inbounds i16, i16* %12, i64 undef | |
%14 = load i16, i16* %13, align 2 | |
%15 = icmp eq i16 %14, -1 | |
br i1 %15, label %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit, label %.lr.ph | |
.lr.ph: ; preds = %1 | |
br label %16 | |
_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit.loopexit: ; preds = %16 | |
br label %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit | |
_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit: ; preds = %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit.loopexit, %1 | |
ret void | |
16: ; preds = %.lr.ph, %16 | |
%17 = phi i16 [ %14, %.lr.ph ], [ %65, %16 ] | |
%18 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 0 | |
%19 = load i32*, i32** %18, align 16 | |
%20 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 9 | |
%21 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 8 | |
%22 = lshr i16 %17, 11 | |
%23 = trunc i16 %22 to i8 | |
%24 = add nsw i8 %23, -1 | |
store i8 %24, i8* %20, align 4 | |
%25 = tail call %struct.float3 @_ZmlRK5char3RK6float3() | |
%26 = extractvalue %struct.float3 %25, 0 | |
%27 = extractvalue %struct.float3 %25, 2 | |
%28 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 2, i32 0 | |
%.idx.val.i.i.i = load float, float* %28, align 16 | |
%29 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 2, i32 2 | |
%.idx5.val.i.i.i = load float, float* %29, align 8 | |
%30 = fsub contract float %.idx.val.i.i.i, %26 | |
%31 = fsub contract float %.idx5.val.i.i.i, %27 | |
%32 = tail call %struct.float3 @_ZL11make_float3fff(float %30, float %31) | |
%oldret.i.i.i.i = extractvalue %struct.float3 %32, 0 | |
%oldret1.i.i.i.i = extractvalue %struct.float3 %32, 1 | |
%oldret3.i.i.i.i = extractvalue %struct.float3 %32, 2 | |
%33 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 5, i32 0 | |
store float %oldret.i.i.i.i, float* %33, align 16 | |
%.sroa.215.0..sroa_idx16.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 5, i32 1 | |
store float %oldret1.i.i.i.i, float* %.sroa.215.0..sroa_idx16.i.i.i, align 4 | |
%.sroa.3.0..sroa_idx17.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 5, i32 2 | |
store float %oldret3.i.i.i.i, float* %.sroa.3.0..sroa_idx17.i.i.i, align 8 | |
%34 = zext i8 %24 to i64 | |
%35 = getelementptr inbounds [27 x %struct.char3], [27 x %struct.char3] addrspace(4)* @_ZN7cuneibs16d_cell_to_offsetE, i64 0, i64 %34 | |
%36 = addrspacecast %struct.char3 addrspace(4)* %35 to %struct.char3* | |
%37 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 3, i32 0 | |
%.idx8.val.i.i.i = load i32, i32* %37, align 16 | |
%.idx9.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 3, i32 1 | |
%.idx9.val.i.i.i = load i32, i32* %.idx9.i.i.i, align 4 | |
%.idx10.i.i.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 3, i32 2 | |
%.idx10.val.i.i.i = load i32, i32* %.idx10.i.i.i, align 8 | |
%.idx11.i.i.i = getelementptr %struct.char3, %struct.char3* %36, i64 0, i32 0 | |
%.idx11.val.i.i.i = load i8, i8* %.idx11.i.i.i, align 1 | |
%.idx12.i.i.i = getelementptr %struct.char3, %struct.char3* %36, i64 0, i32 1 | |
%.idx12.val.i.i.i = load i8, i8* %.idx12.i.i.i, align 1 | |
%.idx13.i.i.i = getelementptr %struct.char3, %struct.char3* %36, i64 0, i32 2 | |
%.idx13.val.i.i.i = load i8, i8* %.idx13.i.i.i, align 1 | |
%38 = sext i8 %.idx11.val.i.i.i to i32 | |
%39 = add nsw i32 %.idx8.val.i.i.i, %38 | |
%40 = sext i8 %.idx12.val.i.i.i to i32 | |
%41 = add nsw i32 %.idx9.val.i.i.i, %40 | |
%42 = sext i8 %.idx13.val.i.i.i to i32 | |
%43 = add nsw i32 %.idx10.val.i.i.i, %42 | |
%44 = tail call %struct.int3 @_ZL9make_int3iii(i32 %39, i32 %41, i32 %43) | |
%45 = getelementptr inbounds i32, i32* %19, i64 undef | |
%46 = load i32, i32* %45, align 4 | |
store i32 %46, i32* %21, align 8 | |
%47 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 11 | |
%48 = load i32, i32* %47, align 16 | |
%49 = load %struct.float4*, %struct.float4** %3, align 8 | |
%50 = zext i32 %48 to i64 | |
%51 = getelementptr inbounds %struct.float4, %struct.float4* %49, i64 %50 | |
%52 = bitcast %struct.float4* %51 to i8* | |
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1 %52, i64 undef, i1 false) | |
%.idx.i = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 5, i32 2 | |
%.idx.val.i = load float, float* %.idx.i, align 8 | |
%.idx1.val.i = load float, float* inttoptr (i64 8 to float*), align 8 | |
%53 = fsub contract float %.idx.val.i, %.idx1.val.i | |
%54 = tail call %struct.float4 @_ZL11make_float4ffff(float %53, float undef) | |
%55 = tail call i1 @_ZL8isfinitef(float undef) | |
%56 = load i64, i64* addrspacecast (i64 addrspace(4)* @_ZN7cuneibs17d_neiblist_strideE to i64*), align 8, !tbaa !128 | |
%57 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 7 | |
%58 = load i64, i64* %57, align 16 | |
%59 = add i64 %58, %56 | |
store i64 %59, i64* %57, align 16, !tbaa !131 | |
%60 = getelementptr inbounds %"class.cuneibs::neiblist_iterator.1", %"class.cuneibs::neiblist_iterator.1"* %2, i64 0, i32 2, i32 4 | |
%61 = load i32, i32* %60, align 4 | |
%62 = zext i32 %61 to i64 | |
%63 = add i64 %59, %62 | |
%64 = getelementptr inbounds i16, i16* %12, i64 %63 | |
%65 = load i16, i16* %64, align 2 | |
%66 = icmp eq i16 %65, -1 | |
br i1 %66, label %_ZN7cuneibs24neiblist_iterator_simpleIL12ParticleType0EE4nextEv.exit.loopexit, label %16 | |
} | |
attributes #0 = { argmemonly nounwind willreturn } | |
!nvvm.annotations = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46, !47, !48, !49, !50, !51, !52, !53, !54, !55, !56, !57, !57, !58, !58, !59, !59, !60, !60, !61, !62, !63, !64, !65, !66, !67, !68, !69, !70, !71, !72, !73, !74, !75, !76, !77, !78, !79, !80, !81, !79, !80, !81, !82, !83, !84, !84, !85, !85, !86, !87, !88, !89, !90, !91, !92, !92, !93, !93, !94, !94, !95, !95, !71, !96, !97, !98, !99, !100, !101, !102, !103, !84, !84, !85, !85, !104, !105, !106, !107, !104, !108, !109, !110, !111, !112, !113, !114, !111, !115, !116, !117, !118, !119, !120, !121, !118, !122, !123, !124} | |
!0 = !{void (i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"kernel", i32 1} | |
!1 = !{void (i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"maxntidx", i32 256} | |
!2 = !{void (i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"minctasm", i32 6} | |
!3 = !{void (%struct.cell_params*, i32)* undef, !"kernel", i32 1} | |
!4 = !{void (%struct.float4*, %struct.ushort4*, i32*, %struct.uint4*, i32*, i16*, i32, float, float)* undef, !"kernel", i32 1} | |
!5 = !{void (%struct.ushort4*, %struct.float4*, %struct.uint4*, i32)* undef, !"kernel", i32 1} | |
!6 = !{void (%struct.ushort4*, %struct.float4*, %struct.float4*, i32, i32, float)* undef, !"kernel", i32 1} | |
!7 = !{void (%struct.float4*, %struct.ushort4*, i32)* undef, !"kernel", i32 1} | |
!8 = !{void (%struct.pos_info_wrapper*, float*, i32, float)* undef, !"kernel", i32 1} | |
!9 = !{void (%struct.pos_info_wrapper*, float*, i32, float)* undef, !"maxntidx", i32 128} | |
!10 = !{void (%struct.pos_info_wrapper*, float*, i32, float)* undef, !"minctasm", i32 6} | |
!11 = !{void (%struct.jacobi_update_params*)* undef, !"kernel", i32 1} | |
!12 = !{void (%struct.jacobi_update_params*)* undef, !"maxntidx", i32 128} | |
!13 = !{void (%struct.jacobi_update_params*)* undef, !"minctasm", i32 6} | |
!14 = !{void (%struct.ushort4*, %struct.float4*, %struct.float4*, float*, i32)* undef, !"kernel", i32 1} | |
!15 = !{void (%struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i32, i32, %struct.float4*)* undef, !"kernel", i32 1} | |
!16 = !{void (%struct.float4*, i32, i32)* undef, !"kernel", i32 1} | |
!17 = !{void ()* undef, !"kernel", i32 1} | |
!18 = !{void (float*, %struct.float4*, i32)* undef, !"kernel", i32 1} | |
!19 = !{void (%struct.float4*, %struct.float4*, %struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i16*, i32*, i32, float, float)* undef, !"kernel", i32 1} | |
!20 = !{void (%struct.float4*, %struct.float4*, %struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i16*, i32*, i32, float, float)* undef, !"maxntidx", i32 128} | |
!21 = !{void (%struct.float4*, %struct.float4*, %struct.float4*, %struct.float4*, %struct.ushort4*, i32*, i16*, i32*, i32, float, float)* undef, !"minctasm", i32 6} | |
!22 = !{void (%struct.sa_outgoing_bc_params*)* undef, !"kernel", i32 1} | |
!23 = !{void (%struct.float4*, %struct.uint4*, %struct.ushort4*, i32*, i32*, i16*, i32)* undef, !"kernel", i32 1} | |
!24 = !{void (%struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32)* undef, !"kernel", i32 1} | |
!25 = !{void (%struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32)* undef, !"maxntidx", i32 128} | |
!26 = !{void (%struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32)* undef, !"minctasm", i32 6} | |
!27 = !{void (%struct.float4*, %struct.float4*, %struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32, float)* undef, !"kernel", i32 1} | |
!28 = !{void (%struct.float4*, %struct.float4*, %struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32, float)* undef, !"maxntidx", i32 128} | |
!29 = !{void (%struct.float4*, %struct.float4*, %struct.uint4*, i32*, %struct.ushort4*, i32*, i16*, %struct.float4*, i32, float)* undef, !"minctasm", i32 6} | |
!30 = !{void (%struct.float4*, i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"kernel", i32 1} | |
!31 = !{void (%struct.float4*, i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"maxntidx", i32 256} | |
!32 = !{void (%struct.float4*, i32*, i32*, %struct.ushort4*, i32*, i32)* undef, !"minctasm", i32 6} | |
!33 = !{void (%struct.reorder_params*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"kernel", i32 1} | |
!34 = !{void (%struct.reorder_params*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"maxntidx", i32 256} | |
!35 = !{void (%struct.reorder_params*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"minctasm", i32 6} | |
!36 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::device_ptr"*, i64, %"class.thrust::tuple"*, i32*, %struct.ptype_hash_compare*)* undef, !"kernel", i32 1} | |
!37 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::device_ptr"*, i64, %"class.thrust::tuple"*, i32*, %struct.ptype_hash_compare*)* undef, !"maxntidx", i32 256} | |
!38 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f"*, i64)* undef, !"kernel", i32 1} | |
!39 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f"*, i64)* undef, !"maxntidx", i32 256} | |
!40 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.69"*, i64)* undef, !"kernel", i32 1} | |
!41 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.69"*, i64)* undef, !"maxntidx", i32 256} | |
!42 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.72"*, i64)* undef, !"kernel", i32 1} | |
!43 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.72"*, i64)* undef, !"maxntidx", i32 256} | |
!44 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.74"*, i64)* undef, !"kernel", i32 1} | |
!45 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.74"*, i64)* undef, !"maxntidx", i32 256} | |
!46 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::tuple"*, i64, i64, i64*, %struct.ptype_hash_compare*, i64, i32)* undef, !"kernel", i32 1} | |
!47 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::tuple"*, i64, i64, i64*, %struct.ptype_hash_compare*, i64, i32)* undef, !"maxntidx", i32 256} | |
!48 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::device_ptr"*, i64, %"class.thrust::tuple"*, i32*, %struct.ptype_hash_compare*, i64*, i64)* undef, !"kernel", i32 1} | |
!49 = !{void (i1, %"class.thrust::zip_iterator"*, %"class.thrust::device_ptr"*, i64, %"class.thrust::tuple"*, i32*, %struct.ptype_hash_compare*, i64*, i64)* undef, !"maxntidx", i32 256} | |
!50 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.79"*, i64)* undef, !"kernel", i32 1} | |
!51 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.79"*, i64)* undef, !"maxntidx", i32 256} | |
!52 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.81"*, i64)* undef, !"kernel", i32 1} | |
!53 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.81"*, i64)* undef, !"maxntidx", i32 256} | |
!54 = !{void (%struct.buildneibs_params*)* undef, !"kernel", i32 1} | |
!55 = !{void (%struct.buildneibs_params*)* undef, !"maxntidx", i32 256} | |
!56 = !{void (%struct.buildneibs_params*)* undef, !"minctasm", i32 5} | |
!57 = !{void (%struct.euler_params*)* undef, !"kernel", i32 1} | |
!58 = !{void (%struct.euler_params.111*)* undef, !"kernel", i32 1} | |
!59 = !{void (%struct.euler_params.114*)* undef, !"kernel", i32 1} | |
!60 = !{void (%struct.euler_params.117*)* undef, !"kernel", i32 1} | |
!61 = !{void (%"struct.cub::ReduceByKeyScanTileState"*, i64)* undef, !"kernel", i32 1} | |
!62 = !{void (%"struct.cub::ReduceByKeyScanTileState"*, i64)* undef, !"maxntidx", i32 128} | |
!63 = !{void (%"class.thrust::device_ptr.120"*, %"class.thrust::device_ptr.124"*, %"class.thrust::device_ptr.124"*, %"struct.thrust::equal_to"*, %"struct.thrust::plus"*, %"struct.cub::ReduceByKeyScanTileState"*, i32, %"struct.thrust::cuda_cub::__scan_by_key::DoNothing"*)* undef, !"kernel", i32 1} | |
!64 = !{void (%"class.thrust::device_ptr.120"*, %"class.thrust::device_ptr.124"*, %"class.thrust::device_ptr.124"*, %"struct.thrust::equal_to"*, %"struct.thrust::plus"*, %"struct.cub::ReduceByKeyScanTileState"*, i32, %"struct.thrust::cuda_cub::__scan_by_key::DoNothing"*)* undef, !"maxntidx", i32 256} | |
!65 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.166"*, i64)* undef, !"kernel", i32 1} | |
!66 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.166"*, i64)* undef, !"maxntidx", i32 256} | |
!67 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.169"*, i64)* undef, !"kernel", i32 1} | |
!68 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.169"*, i64)* undef, !"maxntidx", i32 256} | |
!69 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.171"*, i64)* undef, !"kernel", i32 1} | |
!70 = !{void (%"struct.thrust::cuda_cub::__transform::unary_transform_f.171"*, i64)* undef, !"maxntidx", i32 256} | |
!71 = !{void (%struct.density_diffusion_params*)* undef, !"kernel", i32 1} | |
!72 = !{void (%struct.forces_params*)* undef, !"kernel", i32 1} | |
!73 = !{void (%struct.forces_params.223*)* undef, !"kernel", i32 1} | |
!74 = !{void (%struct.finalize_forces_params*)* undef, !"kernel", i32 1} | |
!75 = !{void (%struct.forces_params.236*)* undef, !"kernel", i32 1} | |
!76 = !{void (%struct.forces_params.250*)* undef, !"kernel", i32 1} | |
!77 = !{void (%struct.forces_params.256*)* undef, !"kernel", i32 1} | |
!78 = !{void (%struct.finalize_forces_params.262*)* undef, !"kernel", i32 1} | |
!79 = !{void (%struct.neibs_interaction_params*, %struct.float4*)* undef, !"kernel", i32 1} | |
!80 = !{void (%struct.neibs_interaction_params*, %struct.float4*)* undef, !"maxntidx", i32 128} | |
!81 = !{void (%struct.neibs_interaction_params*, %struct.float4*)* undef, !"minctasm", i32 6} | |
!82 = !{void (%struct.neibs_interaction_params*, %struct.float3*)* undef, !"kernel", i32 1} | |
!83 = !{void (%"struct.cupostprocess::testpoints_params"*)* undef, !"kernel", i32 1} | |
!84 = !{void (%struct.neibs_interaction_params*, %struct.float4*, %struct.ushort4*)* undef, !"kernel", i32 1} | |
!85 = !{void (%struct.neibs_interaction_params*, %struct.float4*, %struct.ushort4*, float)* undef, !"kernel", i32 1} | |
!86 = !{void (%struct.reorder_params.278*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"kernel", i32 1} | |
!87 = !{void (%struct.reorder_params.278*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"maxntidx", i32 256} | |
!88 = !{void (%struct.reorder_params.278*, i32*, i32*, i32*, %struct.ushort4*, i32*, i32*, i32, i32*)* undef, !"minctasm", i32 6} | |
!89 = !{void (%struct.buildneibs_params.279*)* undef, !"kernel", i32 1} | |
!90 = !{void (%struct.buildneibs_params.279*)* undef, !"maxntidx", i32 256} | |
!91 = !{void (%struct.buildneibs_params.279*)* undef, !"minctasm", i32 5} | |
!92 = !{void (%struct.euler_params.282*)* undef, !"kernel", i32 1} | |
!93 = !{void (%struct.euler_params.285*)* undef, !"kernel", i32 1} | |
!94 = !{void (%struct.euler_params.288*)* undef, !"kernel", i32 1} | |
!95 = !{void (%struct.euler_params.291*)* undef, !"kernel", i32 1} | |
!96 = !{void (%struct.forces_params.294*)* undef, !"kernel", i32 1} | |
!97 = !{void (%struct.forces_params.300*)* undef, !"kernel", i32 1} | |
!98 = !{void (%struct.finalize_forces_params.305*)* undef, !"kernel", i32 1} | |
!99 = !{void (%struct.forces_params.312*)* undef, !"kernel", i32 1} | |
!100 = !{void (%struct.forces_params.319*)* undef, !"kernel", i32 1} | |
!101 = !{void (%struct.forces_params.325*)* undef, !"kernel", i32 1} | |
!102 = !{void (%struct.finalize_forces_params.331*)* undef, !"kernel", i32 1} | |
!103 = !{void (%"struct.cupostprocess::testpoints_params.336"*)* undef, !"kernel", i32 1} | |
!104 = !{void (%struct.density_diffusion_params.339*)* undef, !"kernel", i32 1} | |
!105 = !{void (%struct.forces_params.341*)* undef, !"kernel", i32 1} | |
!106 = !{void (%struct.forces_params.346*)* undef, !"kernel", i32 1} | |
!107 = !{void (%struct.forces_params.351*)* undef, !"kernel", i32 1} | |
!108 = !{void (%struct.forces_params.356*)* undef, !"kernel", i32 1} | |
!109 = !{void (%struct.forces_params.361*)* undef, !"kernel", i32 1} | |
!110 = !{void (%struct.forces_params.366*)* undef, !"kernel", i32 1} | |
!111 = !{void (%struct.density_diffusion_params.371*)* undef, !"kernel", i32 1} | |
!112 = !{void (%struct.forces_params.373*)* undef, !"kernel", i32 1} | |
!113 = !{void (%struct.forces_params.378*)* undef, !"kernel", i32 1} | |
!114 = !{void (%struct.forces_params.383*)* undef, !"kernel", i32 1} | |
!115 = !{void (%struct.forces_params.388*)* undef, !"kernel", i32 1} | |
!116 = !{void (%struct.forces_params.393*)* undef, !"kernel", i32 1} | |
!117 = !{void (%struct.forces_params.398*)* undef, !"kernel", i32 1} | |
!118 = !{void (%struct.density_diffusion_params.403*)* undef, !"kernel", i32 1} | |
!119 = !{void (%struct.forces_params.405*)* undef, !"kernel", i32 1} | |
!120 = !{void (%struct.forces_params.410*)* undef, !"kernel", i32 1} | |
!121 = !{void (%struct.forces_params.415*)* @_ZN8cuforces12forcesDeviceI13forces_paramsIL10KernelType3EL14SPHFormulation1EL20DensityDiffusionType3EL12BoundaryType4E12FullViscSpecIL12RheologyType0EL15TurbulenceModel1EL26ComputationalViscosityType0EL12ViscousModel0EL15AverageOperator0ELm517ELb0EELm517EL12ParticleType1ELSD_0EL7RunMode1ELb0ELb0ELb0ELb0E5emptyI18xsph_forces_paramsESF_I20volume_forces_paramsESF_I21grenier_forces_paramsESF_I25sa_boundary_forces_paramsESF_I28dummy_boundary_forces_paramsESF_I25water_depth_forces_paramsESF_I18keps_forces_paramsESF_I14tau_tex_paramsESF_I22eulerVel_forces_paramsESF_I29internal_energy_forces_paramsESF_I28effective_visc_forces_paramsEELS2_3ELS3_1ELS4_3ELS5_4ESC_Lm517ELSD_1ELSD_0EEEvT_, !"kernel", i32 1} | |
!122 = !{void (%struct.forces_params.420*)* undef, !"kernel", i32 1} | |
!123 = !{void (%struct.forces_params.425*)* undef, !"kernel", i32 1} | |
!124 = !{void (%struct.forces_params.430*)* undef, !"kernel", i32 1} | |
!125 = !{!126, !126, i64 0} | |
!126 = !{!"vtable pointer", !127, i64 0} | |
!127 = !{!"Simple C++ TBAA"} | |
!128 = !{!129, !129, i64 0} | |
!129 = !{!"long", !130, i64 0} | |
!130 = !{!"omnipotent char", !127, i64 0} | |
!131 = !{!132, !129, i64 64} | |
!132 = !{!"_ZTSN7cuneibs22neiblist_iterator_coreE", !133, i64 0, !133, i64 8, !134, i64 16, !136, i64 32, !137, i64 44, !138, i64 48, !129, i64 64, !137, i64 72, !130, i64 76, !137, i64 80} | |
!133 = !{!"any pointer", !130, i64 0} | |
!134 = !{!"_ZTS6float4", !135, i64 0, !135, i64 4, !135, i64 8, !135, i64 12} | |
!135 = !{!"float", !130, i64 0} | |
!136 = !{!"_ZTS4int3", !137, i64 0, !137, i64 4, !137, i64 8} | |
!137 = !{!"int", !130, i64 0} | |
!138 = !{!"_ZTS6float3", !135, i64 0, !135, i64 4, !135, i64 8} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
SROA debug log: