Skip to content

Instantly share code, notes, and snippets.

View Artem-B's full-sized avatar
-

Artem Belevich Artem-B

-
View GitHub Profile
;*** IR Dump After Combine redundant instructions *** (function: _ZN8cuforces12forcesDeviceI13forces_paramsIL10KernelType3EL14SPHFormulation1EL20DensityDiffusionType3EL12BoundaryType4E12FullViscSpecIL12RheologyType0EL15TurbulenceModel1EL26ComputationalViscosityType0EL12ViscousModel0EL15AverageOperator0ELm517ELb0EELm517EL12ParticleType1ELSD_0EL7RunMode1ELb0ELb0ELb0ELb0E5emptyI18xsph_forces_paramsESF_I20volume_forces_paramsESF_I21grenier_forces_paramsESF_I25sa_boundary_forces_paramsESF_I28dummy_boundary_forces_paramsESF_I25water_depth_forces_paramsESF_I18keps_forces_paramsESF_I14tau_tex_paramsESF_I22eulerVel_forces_paramsESF_I29internal_energy_forces_paramsESF_I28effective_visc_forces_paramsEELS2_3ELS3_1ELS4_3ELS5_4ESC_Lm517ELSD_1ELSD_0EEEvT_)
; ModuleID = 'reduced.ll.ll'
source_filename = "<stdin>"
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
%struct.char3 = type { i8, i8, i8 }
%"class.cuneibs::neiblist_iterator_core" = type <{ i32*, i16*, %struct.float
;*** IR Dump After Straight line strength reduction (slsr) *** (function: _ZN8cuforces12forcesDeviceI13forces_paramsIL10KernelType3EL14SPHFormulation1EL20DensityDiffusionType3EL12BoundaryType4E12FullViscSpecIL12RheologyType0EL15TurbulenceModel1EL26ComputationalViscosityType0EL12ViscousModel0EL15AverageOperator0ELm517ELb0EELm517EL12ParticleType1ELSD_0EL7RunMode1ELb0ELb0ELb0ELb0E5emptyI18xsph_forces_paramsESF_I20volume_forces_paramsESF_I21grenier_forces_paramsESF_I25sa_boundary_forces_paramsESF_I28dummy_boundary_forces_paramsESF_I25water_depth_forces_paramsESF_I18keps_forces_paramsESF_I14tau_tex_paramsESF_I22eulerVel_forces_paramsESF_I29internal_energy_forces_paramsESF_I28effective_visc_forces_paramsEELS2_3ELS3_1ELS4_3ELS5_4ESC_Lm517ELSD_1ELSD_0EEEvT_)
; ModuleID = 'reduced.ll.ll'
source_filename = "<stdin>"
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
%struct.char3 = type { i8, i8, i8 }
%struct.float4 = type { float, float, float, float }
%struct.float3
; Compile with:
;
; clang "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-pc-linux-gnu"
; "-S" "-disable-llvm-verifier" "-discard-value-names" "-main-file-name"
; "DamBreak3D.cu" "-mrelocation-model" "static" "-mframe-pointer=all"
; "-fno-rounding-math" "-fno-verbose-asm" "-no-integrated-as" "-aux-target-cpu"
; "x86-64" "-fcuda-is-device"
; "-target-feature" "+ptx70" "-target-sdk-version=11.0" "-target-cpu" "sm_70"
; "-O3" "-x" "ir"
--- bin/res.clang 2021-10-05 16:31:10.824553505 -0700
+++ bin/res.clang11 2021-10-05 16:56:46.800860833 -0700
@@ -11,39 +11,39 @@
Common:
GLOBAL:402 CONSTANT[3]:2844
Function _ZN13cupostprocess14calcVortDeviceIL10KernelType3EL12BoundaryType4EEEv24neibs_interaction_paramsIXT0_ENSt11conditionalIXcvbeqT0_LS2_2EE18sa_boundary_params5emptyIS5_EE4typeEEP6float3:
- REG:47 STACK:112 SHARED:0 LOCAL:0 CONSTANT[0]:424 CONSTANT[2]:16 TEXTURE:0 SURFACE:0 SAMPLER:0
+ REG:46 STACK:0 SHARED:0 LOCAL:0 CONSTANT[0]:424 CONSTANT[2]:16 TEXTURE:0 SURFACE:0 SAMPLER:0
Function _ZN13cupostprocess20calcTestpointsDeviceIL10KernelType3EL12BoundaryType4E12FullViscSpecIL12RheologyType0EL15TurbulenceModel1EL26ComputationalViscosityType0EL12ViscousModel0EL15AverageOperator0ELm513ELb0EEEEvNS_17testpoints_paramsIXT0_ET1_XeqsrSB_9turbmodelLS5_3EE24neibs_interaction_paramsIXT0_ENSt11conditionalIXcvbeqT0_LS2_2EE18sa_boundary_params5emptyISE_EE4typeEENSD_IXcvbeqsrSB_9turbmodelLS5_3EE15keps_tex_paramsSF_ISK_EE4typeENSD_IXcvbeqsrSB_9turb
--- bin/res.clang 2021-10-05 16:31:10.824553505 -0700
+++ bin/res.nvcc 2021-10-05 16:31:00.712472386 -0700
@@ -9,219 +9,232 @@
Resource usage:
Common:
- GLOBAL:402 CONSTANT[3]:2844
+ GLOBAL:0
+
+Fatbin elf code:
namespace {
template <int N>
struct __Tag;
# 54 "__clang_cuda_texture_intrinsics.h" 3
template <class>
struct __FT;
template <>
; ModuleID = 'repro.ll'
source_filename = "blaze-out/k8-opt/genfiles/third_party/amdgpu/rocFFT/kernels/kernels_273.cpp"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
%struct.ham = type { %struct.zot }
%struct.zot = type { %struct.quux }
%struct.quux = type { <2 x double> }
$_ZZ22fft_fwd_ip_len100_sbccI15HIP_vector_typeIdLj2EEL9StrideBin0ELb1EEvPKT_S5_mPKmS7_mPS3_E3lds = comdat any
; ModuleID = '/usr/local/google/home/tra/work/llvm/repo/clang/test/CodeGenCUDA/unused-global-var.cu'
source_filename = "/usr/local/google/home/tra/work/llvm/repo/clang/test/CodeGenCUDA/unused-global-var.cu"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn-amd-amdhsa"
@v1 = dso_local local_unnamed_addr addrspace(1) externally_initialized global i32 0, align 4
@v2 = dso_local local_unnamed_addr addrspace(4) externally_initialized global i32 0, align 4
@v4.managed = dso_local addrspace(1) externally_initialized global i32 0, align 4
@u1 = dso_local local_unnamed_addr addrspace(1) externally_initialized global i32 0, align 4
@u2 = dso_local local_unnamed_addr addrspace(4) externally_initialized global i32 0, align 4