Created
June 13, 2019 22:19
-
-
Save jdoerfert/4376a251d98171326d625f2fb67b5259 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; __CLANG_OFFLOAD_BUNDLE____START__ openmp-nvptx64-nvida-cuda | |
; ModuleID = 'test.c' | |
source_filename = "test.c" | |
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" | |
target triple = "nvptx64-nvida-cuda" | |
%class.omptarget_nvptx_ThreadPrivateContext = type { %class.omptarget_nvptx_TeamDescr, [1024 x %class.omptarget_nvptx_TaskDescr], [1024 x %class.omptarget_nvptx_TaskDescr*], %union.anon, [1024 x i32], [1024 x i64], [1024 x i64], [1024 x i64], [1024 x i64], i64, [8 x i8] } | |
%class.omptarget_nvptx_TeamDescr = type { %class.omptarget_nvptx_TaskDescr, %class.omptarget_nvptx_WorkDescr, i64, [8 x i8], [32 x %struct.__kmpc_data_sharing_worker_slot_static], [1 x %struct.__kmpc_data_sharing_master_slot_static] } | |
%class.omptarget_nvptx_TaskDescr = type { %"struct.omptarget_nvptx_TaskDescr::SavedLoopDescr_items", %"struct.omptarget_nvptx_TaskDescr::TaskDescr_items", %class.omptarget_nvptx_TaskDescr* } | |
%"struct.omptarget_nvptx_TaskDescr::SavedLoopDescr_items" = type { i64, i64, i64, i64, i32 } | |
%"struct.omptarget_nvptx_TaskDescr::TaskDescr_items" = type { i8, i8, i16, i64 } | |
%class.omptarget_nvptx_WorkDescr = type { %class.omptarget_nvptx_TaskDescr } | |
%struct.__kmpc_data_sharing_worker_slot_static = type { %struct.__kmpc_data_sharing_slot*, %struct.__kmpc_data_sharing_slot*, i8*, i8*, [8192 x i8] } | |
%struct.__kmpc_data_sharing_slot = type { %struct.__kmpc_data_sharing_slot*, %struct.__kmpc_data_sharing_slot*, i8*, i8*, [0 x i8] } | |
%struct.__kmpc_data_sharing_master_slot_static = type { %struct.__kmpc_data_sharing_slot*, %struct.__kmpc_data_sharing_slot*, i8*, i8*, [256 x i8] } | |
%union.anon = type { [1024 x i16] } | |
%struct.DataSharingStateTy = type { [32 x %struct.__kmpc_data_sharing_slot*], [32 x i8*], [32 x i8*], [32 x i32] } | |
%class.omptarget_nvptx_Queue = type { [32 x %class.omptarget_nvptx_ThreadPrivateContext], [32 x %class.omptarget_nvptx_ThreadPrivateContext*], i32, [32 x i32], i32, [8 x i8] } | |
@__openmp_nvptx_data_transfer_temporary_storage = common addrspace(3) global [32 x i32] zeroinitializer | |
@__omp_offloading_10_1012dd1_main_l7_exec_mode = weak constant i8 0 | |
@omptarget_nvptx_threadPrivateContext = external dso_local local_unnamed_addr addrspace(3) global %class.omptarget_nvptx_ThreadPrivateContext*, align 8 | |
@DataSharingState = external dso_local local_unnamed_addr addrspace(3) global %struct.DataSharingStateTy, align 8 | |
@parallelLevel = external dso_local local_unnamed_addr addrspace(3) global [32 x i8], align 1 | |
@execution_param = external dso_local local_unnamed_addr addrspace(3) global i32, align 4 | |
@usedSlotIdx = external dso_local local_unnamed_addr addrspace(3) global i32, align 4 | |
@omptarget_nvptx_device_State = external dso_local addrspace(1) global [84 x %class.omptarget_nvptx_Queue], align 16 | |
@llvm.compiler.used = appending global [2 x i8*] [i8* @__omp_offloading_10_1012dd1_main_l7_exec_mode, i8* addrspacecast (i8 addrspace(3)* bitcast ([32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage to i8 addrspace(3)*) to i8*)], section "llvm.metadata" | |
; Function Attrs: norecurse nounwind | |
define weak void @__omp_offloading_10_1012dd1_main_l7(i32* dereferenceable(4) %Count) local_unnamed_addr #0 { | |
entry: | |
store i32 1, i32* addrspacecast (i32 addrspace(3)* @execution_param to i32*), align 4, !tbaa !13 | |
%0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #5, !range !17 | |
%cmp.i = icmp eq i32 %0, 0 | |
br i1 %cmp.i, label %if.then19.i, label %if.else.i | |
if.else.i: ; preds = %entry | |
%and.i.i = and i32 %0, 31 | |
%cmp6.i = icmp eq i32 %and.i.i, 0 | |
br i1 %cmp6.i, label %if.then7.i, label %__kmpc_spmd_kernel_init.exit | |
if.then7.i: ; preds = %if.else.i | |
%div.i.i = lshr i32 %0, 5 | |
%idxprom.i = zext i32 %div.i.i to i64 | |
%arrayidx64.i = getelementptr inbounds [32 x i8], [32 x i8] addrspace(3)* @parallelLevel, i64 0, i64 %idxprom.i | |
%arrayidx.i = addrspacecast i8 addrspace(3)* %arrayidx64.i to i8* | |
%1 = tail call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #5, !range !18 | |
%cmp9.i = icmp ugt i32 %1, 1 | |
%add11.i = select i1 %cmp9.i, i8 -127, i8 1 | |
store i8 %add11.i, i8* %arrayidx.i, align 1, !tbaa !19 | |
br label %__kmpc_spmd_kernel_init.exit | |
if.then19.i: ; preds = %entry | |
%2 = tail call i32 asm "mov.u32 $0, %smid;", "=r"() #6, !srcloc !20 | |
%rem.i = urem i32 %2, 84 | |
store i32 %rem.i, i32* addrspacecast (i32 addrspace(3)* @usedSlotIdx to i32*), align 4, !tbaa !13 | |
%3 = tail call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #5, !range !18 | |
%cmp3.i = icmp ugt i32 %3, 1 | |
%add.i = select i1 %cmp3.i, i8 -127, i8 1 | |
store i8 %add.i, i8* getelementptr inbounds ([32 x i8], [32 x i8]* addrspacecast ([32 x i8] addrspace(3)* @parallelLevel to [32 x i8]*), i64 0, i64 0), align 1, !tbaa !19 | |
%idxprom20.i = zext i32 %rem.i to i64 | |
%arrayidx2163.i = getelementptr inbounds [84 x %class.omptarget_nvptx_Queue], [84 x %class.omptarget_nvptx_Queue] addrspace(1)* @omptarget_nvptx_device_State, i64 0, i64 %idxprom20.i | |
%arrayidx21.i = addrspacecast %class.omptarget_nvptx_Queue addrspace(1)* %arrayidx2163.i to %class.omptarget_nvptx_Queue* | |
%head.i.i.i = getelementptr inbounds %class.omptarget_nvptx_Queue, %class.omptarget_nvptx_Queue* %arrayidx21.i, i64 0, i32 2 | |
%4 = atomicrmw add i32* %head.i.i.i, i32 1 seq_cst | |
%rem.i.i = and i32 %4, 31 | |
%div.i.i.i = lshr i32 %4, 5 | |
%mul.i.i.i = shl nuw nsw i32 %div.i.i.i, 1 | |
%idxprom.i3.i.i = zext i32 %rem.i.i to i64 | |
%arrayidx.i4.i.i = getelementptr inbounds %class.omptarget_nvptx_Queue, %class.omptarget_nvptx_Queue* %arrayidx21.i, i64 0, i32 3, i64 %idxprom.i3.i.i | |
br label %while.cond.i.i | |
while.cond.i.i: ; preds = %while.cond.i.i, %if.then19.i | |
%5 = atomicrmw or i32* %arrayidx.i4.i.i, i32 0 seq_cst | |
%cmp.i.i.i = icmp eq i32 %5, %mul.i.i.i | |
br i1 %cmp.i.i.i, label %_ZN21omptarget_nvptx_QueueI36omptarget_nvptx_ThreadPrivateContextLj32EE7DequeueEv.exit.i, label %while.cond.i.i | |
_ZN21omptarget_nvptx_QueueI36omptarget_nvptx_ThreadPrivateContextLj32EE7DequeueEv.exit.i: ; preds = %while.cond.i.i | |
%arrayidx.i2.i.i = getelementptr inbounds %class.omptarget_nvptx_Queue, %class.omptarget_nvptx_Queue* %arrayidx21.i, i64 0, i32 1, i64 %idxprom.i3.i.i | |
%6 = bitcast %class.omptarget_nvptx_ThreadPrivateContext** %arrayidx.i2.i.i to i64* | |
%7 = atomicrmw or i64* %6, i64 0 seq_cst | |
%8 = inttoptr i64 %7 to %class.omptarget_nvptx_ThreadPrivateContext* | |
%cmp.i.i = icmp eq i64 %7, 0 | |
%arrayidx.i4.i = getelementptr inbounds %class.omptarget_nvptx_Queue, %class.omptarget_nvptx_Queue* %arrayidx21.i, i64 0, i32 0, i64 %idxprom.i3.i.i | |
%element.0.i.i = select i1 %cmp.i.i, %class.omptarget_nvptx_ThreadPrivateContext* %arrayidx.i4.i, %class.omptarget_nvptx_ThreadPrivateContext* %8 | |
%add.i.i.i = and i32 %mul.i.i.i, 33554430 | |
%rem.i.i.i = or i32 %add.i.i.i, 1 | |
%9 = atomicrmw xchg i32* %arrayidx.i4.i.i, i32 %rem.i.i.i seq_cst | |
store %class.omptarget_nvptx_ThreadPrivateContext* %element.0.i.i, %class.omptarget_nvptx_ThreadPrivateContext** addrspacecast (%class.omptarget_nvptx_ThreadPrivateContext* addrspace(3)* @omptarget_nvptx_threadPrivateContext to %class.omptarget_nvptx_ThreadPrivateContext**), align 8, !tbaa !21 | |
%flags.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %element.0.i.i, i64 0, i32 0, i32 0, i32 1, i32 0 | |
store i8 0, i8* %flags.i.i.i, align 8, !tbaa !23 | |
%threadId.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %element.0.i.i, i64 0, i32 0, i32 0, i32 1, i32 2 | |
store i16 0, i16* %threadId.i.i.i, align 2, !tbaa !30 | |
%runtimeChunkSize.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %element.0.i.i, i64 0, i32 0, i32 0, i32 1, i32 3 | |
store i64 1, i64* %runtimeChunkSize.i.i.i, align 8, !tbaa !31 | |
br label %__kmpc_spmd_kernel_init.exit | |
__kmpc_spmd_kernel_init.exit: ; preds = %if.else.i, %if.then7.i, %_ZN21omptarget_nvptx_QueueI36omptarget_nvptx_ThreadPrivateContextLj32EE7DequeueEv.exit.i | |
tail call void asm sideeffect "bar.sync $0;", "r,~{memory}"(i32 0) #7, !srcloc !32 | |
%10 = load %class.omptarget_nvptx_ThreadPrivateContext*, %class.omptarget_nvptx_ThreadPrivateContext** addrspacecast (%class.omptarget_nvptx_ThreadPrivateContext* addrspace(3)* @omptarget_nvptx_threadPrivateContext to %class.omptarget_nvptx_ThreadPrivateContext**), align 8, !tbaa !21 | |
%idxprom.i1.i = zext i32 %0 to i64 | |
%arrayidx.i2.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %10, i64 0, i32 1, i64 %idxprom.i1.i | |
%levelZeroTaskDescr.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %10, i64 0, i32 0, i32 0 | |
%flags.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %10, i64 0, i32 1, i64 %idxprom.i1.i, i32 1, i32 0 | |
store i8 48, i8* %flags.i.i, align 8, !tbaa !23 | |
%conv.i.i = trunc i32 %0 to i16 | |
%threadId.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %10, i64 0, i32 1, i64 %idxprom.i1.i, i32 1, i32 2 | |
store i16 %conv.i.i, i16* %threadId.i.i, align 2, !tbaa !30 | |
%runtimeChunkSize.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %10, i64 0, i32 1, i64 %idxprom.i1.i, i32 1, i32 3 | |
store i64 1, i64* %runtimeChunkSize.i.i, align 8, !tbaa !31 | |
%prev.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %10, i64 0, i32 1, i64 %idxprom.i1.i, i32 2 | |
store %class.omptarget_nvptx_TaskDescr* %levelZeroTaskDescr.i.i, %class.omptarget_nvptx_TaskDescr** %prev.i.i, align 8, !tbaa !33 | |
%11 = load %class.omptarget_nvptx_ThreadPrivateContext*, %class.omptarget_nvptx_ThreadPrivateContext** addrspacecast (%class.omptarget_nvptx_ThreadPrivateContext* addrspace(3)* @omptarget_nvptx_threadPrivateContext to %class.omptarget_nvptx_ThreadPrivateContext**), align 8, !tbaa !21 | |
%arrayidx.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %11, i64 0, i32 2, i64 %idxprom.i1.i | |
store %class.omptarget_nvptx_TaskDescr* %arrayidx.i2.i, %class.omptarget_nvptx_TaskDescr** %arrayidx.i.i, align 8, !tbaa !21 | |
br i1 %cmp.i, label %if.then.i, label %__kmpc_data_sharing_init_stack_spmd.exit | |
if.then.i: ; preds = %__kmpc_spmd_kernel_init.exit | |
%12 = load %class.omptarget_nvptx_ThreadPrivateContext*, %class.omptarget_nvptx_ThreadPrivateContext** addrspacecast (%class.omptarget_nvptx_ThreadPrivateContext* addrspace(3)* @omptarget_nvptx_threadPrivateContext to %class.omptarget_nvptx_ThreadPrivateContext**), align 8, !tbaa !21 | |
br label %for.body.i.i | |
for.body.i.i: ; preds = %for.body.i.i, %if.then.i | |
%WID.013.i.i = phi i32 [ 0, %if.then.i ], [ %inc.i.i.3, %for.body.i.i ] | |
%idxprom.i.i.i = zext i32 %WID.013.i.i to i64 | |
%arrayidx.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i | |
%add.ptr.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i, i32 4, i64 8192 | |
%DataEnd.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i, i32 3 | |
store i8* %add.ptr.i.i.i, i8** %DataEnd.i.i.i, align 8, !tbaa !34 | |
%Next.i.i.i = getelementptr inbounds %struct.__kmpc_data_sharing_worker_slot_static, %struct.__kmpc_data_sharing_worker_slot_static* %arrayidx.i.i.i, i64 0, i32 0 | |
store %struct.__kmpc_data_sharing_slot* null, %struct.__kmpc_data_sharing_slot** %Next.i.i.i, align 16, !tbaa !36 | |
%Prev.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i, i32 1 | |
store %struct.__kmpc_data_sharing_slot* null, %struct.__kmpc_data_sharing_slot** %Prev.i.i.i, align 8, !tbaa !37 | |
%PrevSlotStackPtr.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i, i32 2 | |
store i8* null, i8** %PrevSlotStackPtr.i.i.i, align 16, !tbaa !38 | |
%arrayidx.i.i2 = getelementptr %struct.DataSharingStateTy, %struct.DataSharingStateTy* addrspacecast (%struct.DataSharingStateTy addrspace(3)* @DataSharingState to %struct.DataSharingStateTy*), i64 0, i32 0, i64 %idxprom.i.i.i | |
%13 = bitcast %struct.__kmpc_data_sharing_slot** %arrayidx.i.i2 to %struct.__kmpc_data_sharing_worker_slot_static** | |
store %struct.__kmpc_data_sharing_worker_slot_static* %arrayidx.i.i.i, %struct.__kmpc_data_sharing_worker_slot_static** %13, align 8, !tbaa !21 | |
%14 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i, i32 4, i64 0 | |
%arrayidx4.i.i = getelementptr %struct.DataSharingStateTy, %struct.DataSharingStateTy* addrspacecast (%struct.DataSharingStateTy addrspace(3)* @DataSharingState to %struct.DataSharingStateTy*), i64 0, i32 1, i64 %idxprom.i.i.i | |
store i8* %14, i8** %arrayidx4.i.i, align 8, !tbaa !21 | |
%inc.i.i = or i32 %WID.013.i.i, 1 | |
%idxprom.i.i.i.1 = zext i32 %inc.i.i to i64 | |
%arrayidx.i.i.i.1 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.1 | |
%add.ptr.i.i.i.1 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.1, i32 4, i64 8192 | |
%DataEnd.i.i.i.1 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.1, i32 3 | |
store i8* %add.ptr.i.i.i.1, i8** %DataEnd.i.i.i.1, align 8, !tbaa !34 | |
%Next.i.i.i.1 = getelementptr inbounds %struct.__kmpc_data_sharing_worker_slot_static, %struct.__kmpc_data_sharing_worker_slot_static* %arrayidx.i.i.i.1, i64 0, i32 0 | |
store %struct.__kmpc_data_sharing_slot* null, %struct.__kmpc_data_sharing_slot** %Next.i.i.i.1, align 16, !tbaa !36 | |
%Prev.i.i.i.1 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.1, i32 1 | |
store %struct.__kmpc_data_sharing_slot* null, %struct.__kmpc_data_sharing_slot** %Prev.i.i.i.1, align 8, !tbaa !37 | |
%PrevSlotStackPtr.i.i.i.1 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.1, i32 2 | |
store i8* null, i8** %PrevSlotStackPtr.i.i.i.1, align 16, !tbaa !38 | |
%arrayidx.i.i2.1 = getelementptr %struct.DataSharingStateTy, %struct.DataSharingStateTy* addrspacecast (%struct.DataSharingStateTy addrspace(3)* @DataSharingState to %struct.DataSharingStateTy*), i64 0, i32 0, i64 %idxprom.i.i.i.1 | |
%15 = bitcast %struct.__kmpc_data_sharing_slot** %arrayidx.i.i2.1 to %struct.__kmpc_data_sharing_worker_slot_static** | |
store %struct.__kmpc_data_sharing_worker_slot_static* %arrayidx.i.i.i.1, %struct.__kmpc_data_sharing_worker_slot_static** %15, align 8, !tbaa !21 | |
%16 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.1, i32 4, i64 0 | |
%arrayidx4.i.i.1 = getelementptr %struct.DataSharingStateTy, %struct.DataSharingStateTy* addrspacecast (%struct.DataSharingStateTy addrspace(3)* @DataSharingState to %struct.DataSharingStateTy*), i64 0, i32 1, i64 %idxprom.i.i.i.1 | |
store i8* %16, i8** %arrayidx4.i.i.1, align 8, !tbaa !21 | |
%inc.i.i.1 = or i32 %WID.013.i.i, 2 | |
%idxprom.i.i.i.2 = zext i32 %inc.i.i.1 to i64 | |
%arrayidx.i.i.i.2 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.2 | |
%add.ptr.i.i.i.2 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.2, i32 4, i64 8192 | |
%DataEnd.i.i.i.2 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.2, i32 3 | |
store i8* %add.ptr.i.i.i.2, i8** %DataEnd.i.i.i.2, align 8, !tbaa !34 | |
%Next.i.i.i.2 = getelementptr inbounds %struct.__kmpc_data_sharing_worker_slot_static, %struct.__kmpc_data_sharing_worker_slot_static* %arrayidx.i.i.i.2, i64 0, i32 0 | |
store %struct.__kmpc_data_sharing_slot* null, %struct.__kmpc_data_sharing_slot** %Next.i.i.i.2, align 16, !tbaa !36 | |
%Prev.i.i.i.2 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.2, i32 1 | |
store %struct.__kmpc_data_sharing_slot* null, %struct.__kmpc_data_sharing_slot** %Prev.i.i.i.2, align 8, !tbaa !37 | |
%PrevSlotStackPtr.i.i.i.2 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.2, i32 2 | |
store i8* null, i8** %PrevSlotStackPtr.i.i.i.2, align 16, !tbaa !38 | |
%arrayidx.i.i2.2 = getelementptr %struct.DataSharingStateTy, %struct.DataSharingStateTy* addrspacecast (%struct.DataSharingStateTy addrspace(3)* @DataSharingState to %struct.DataSharingStateTy*), i64 0, i32 0, i64 %idxprom.i.i.i.2 | |
%17 = bitcast %struct.__kmpc_data_sharing_slot** %arrayidx.i.i2.2 to %struct.__kmpc_data_sharing_worker_slot_static** | |
store %struct.__kmpc_data_sharing_worker_slot_static* %arrayidx.i.i.i.2, %struct.__kmpc_data_sharing_worker_slot_static** %17, align 8, !tbaa !21 | |
%18 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.2, i32 4, i64 0 | |
%arrayidx4.i.i.2 = getelementptr %struct.DataSharingStateTy, %struct.DataSharingStateTy* addrspacecast (%struct.DataSharingStateTy addrspace(3)* @DataSharingState to %struct.DataSharingStateTy*), i64 0, i32 1, i64 %idxprom.i.i.i.2 | |
store i8* %18, i8** %arrayidx4.i.i.2, align 8, !tbaa !21 | |
%inc.i.i.2 = or i32 %WID.013.i.i, 3 | |
%idxprom.i.i.i.3 = zext i32 %inc.i.i.2 to i64 | |
%arrayidx.i.i.i.3 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.3 | |
%add.ptr.i.i.i.3 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.3, i32 4, i64 8192 | |
%DataEnd.i.i.i.3 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.3, i32 3 | |
store i8* %add.ptr.i.i.i.3, i8** %DataEnd.i.i.i.3, align 8, !tbaa !34 | |
%Next.i.i.i.3 = getelementptr inbounds %struct.__kmpc_data_sharing_worker_slot_static, %struct.__kmpc_data_sharing_worker_slot_static* %arrayidx.i.i.i.3, i64 0, i32 0 | |
store %struct.__kmpc_data_sharing_slot* null, %struct.__kmpc_data_sharing_slot** %Next.i.i.i.3, align 16, !tbaa !36 | |
%Prev.i.i.i.3 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.3, i32 1 | |
store %struct.__kmpc_data_sharing_slot* null, %struct.__kmpc_data_sharing_slot** %Prev.i.i.i.3, align 8, !tbaa !37 | |
%PrevSlotStackPtr.i.i.i.3 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.3, i32 2 | |
store i8* null, i8** %PrevSlotStackPtr.i.i.i.3, align 16, !tbaa !38 | |
%arrayidx.i.i2.3 = getelementptr %struct.DataSharingStateTy, %struct.DataSharingStateTy* addrspacecast (%struct.DataSharingStateTy addrspace(3)* @DataSharingState to %struct.DataSharingStateTy*), i64 0, i32 0, i64 %idxprom.i.i.i.3 | |
%19 = bitcast %struct.__kmpc_data_sharing_slot** %arrayidx.i.i2.3 to %struct.__kmpc_data_sharing_worker_slot_static** | |
store %struct.__kmpc_data_sharing_worker_slot_static* %arrayidx.i.i.i.3, %struct.__kmpc_data_sharing_worker_slot_static** %19, align 8, !tbaa !21 | |
%20 = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %12, i64 0, i32 0, i32 4, i64 %idxprom.i.i.i.3, i32 4, i64 0 | |
%arrayidx4.i.i.3 = getelementptr %struct.DataSharingStateTy, %struct.DataSharingStateTy* addrspacecast (%struct.DataSharingStateTy addrspace(3)* @DataSharingState to %struct.DataSharingStateTy*), i64 0, i32 1, i64 %idxprom.i.i.i.3 | |
store i8* %20, i8** %arrayidx4.i.i.3, align 8, !tbaa !21 | |
%inc.i.i.3 = add nuw nsw i32 %WID.013.i.i, 4 | |
%exitcond.i.i.3 = icmp eq i32 %inc.i.i.3, 32 | |
br i1 %exitcond.i.i.3, label %__kmpc_data_sharing_init_stack_spmd.exit, label %for.body.i.i | |
__kmpc_data_sharing_init_stack_spmd.exit: ; preds = %for.body.i.i, %__kmpc_spmd_kernel_init.exit | |
tail call void @llvm.nvvm.membar.cta() #5 | |
%div.i.i.i3 = lshr i32 %0, 5 | |
%idxprom.i.i = zext i32 %div.i.i.i3 to i64 | |
%arrayidx10.i.i = getelementptr inbounds [32 x i8], [32 x i8] addrspace(3)* @parallelLevel, i64 0, i64 %idxprom.i.i | |
%arrayidx.i.i4 = addrspacecast i8 addrspace(3)* %arrayidx10.i.i to i8* | |
%21 = load i8, i8* %arrayidx.i.i4, align 1, !tbaa !19 | |
%22 = and i8 %21, 126 | |
%cmp.i.i5 = icmp eq i8 %22, 0 | |
%cmp.i28.i.i.i = icmp eq i8 %21, -127 | |
br i1 %cmp.i28.i.i.i, label %_Z21GetNumberOfOmpThreadsb.exit.i.i.i, label %_Z21GetNumberOfOmpThreadsb.exit.thread.i.i.i | |
_Z21GetNumberOfOmpThreadsb.exit.thread.i.i.i: ; preds = %__kmpc_data_sharing_init_stack_spmd.exit | |
%23 = load %class.omptarget_nvptx_ThreadPrivateContext*, %class.omptarget_nvptx_ThreadPrivateContext** addrspacecast (%class.omptarget_nvptx_ThreadPrivateContext* addrspace(3)* @omptarget_nvptx_threadPrivateContext to %class.omptarget_nvptx_ThreadPrivateContext**), align 8, !tbaa !21 | |
%arrayidx.i2541.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %23, i64 0, i32 4, i64 %idxprom.i1.i | |
br label %if.then32.i.i.i | |
_Z21GetNumberOfOmpThreadsb.exit.i.i.i: ; preds = %__kmpc_data_sharing_init_stack_spmd.exit | |
%24 = tail call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #5, !range !18 | |
%cmp.i.i.i17 = icmp eq i32 %24, 1 | |
%25 = load %class.omptarget_nvptx_ThreadPrivateContext*, %class.omptarget_nvptx_ThreadPrivateContext** addrspacecast (%class.omptarget_nvptx_ThreadPrivateContext* addrspace(3)* @omptarget_nvptx_threadPrivateContext to %class.omptarget_nvptx_ThreadPrivateContext**), align 8, !tbaa !21 | |
%arrayidx.i25.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %25, i64 0, i32 4, i64 %idxprom.i1.i | |
br i1 %cmp.i.i.i17, label %if.then32.i.i.i, label %if.then83.i.i.i | |
if.then32.i.i.i: ; preds = %_Z21GetNumberOfOmpThreadsb.exit.i.i.i, %_Z21GetNumberOfOmpThreadsb.exit.thread.i.i.i | |
%arrayidx.i2544.i.i.i = phi i32* [ %arrayidx.i25.i.i.i, %_Z21GetNumberOfOmpThreadsb.exit.i.i.i ], [ %arrayidx.i2541.i.i.i, %_Z21GetNumberOfOmpThreadsb.exit.thread.i.i.i ] | |
%26 = phi %class.omptarget_nvptx_ThreadPrivateContext* [ %25, %_Z21GetNumberOfOmpThreadsb.exit.i.i.i ], [ %23, %_Z21GetNumberOfOmpThreadsb.exit.thread.i.i.i ] | |
store i32 33, i32* %arrayidx.i2544.i.i.i, align 4, !tbaa !39 | |
%arrayidx.i23.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %26, i64 0, i32 6, i64 %idxprom.i1.i | |
store i64 999, i64* %arrayidx.i23.i.i.i, align 8, !tbaa !40 | |
%.op = mul nuw nsw | |
%arrayidx.i18.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %26, i64 0, i32 7, i64 %idxprom.i1.i | |
store i64 %conv38.i.i.i, i64* %arrayidx.i18.i.i.i, align 8, !tbaa !40 | |
%arrayidx.i16.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %26, i64 0, i32 8, i64 %idxprom.i1.i | |
store i64 1000, i64* %arrayidx.i16.i.i.i, align 8, !tbaa !40 | |
br label %omp.dispatch.cond.i.preheader | |
if.then83.i.i.i: ; preds = %_Z21GetNumberOfOmpThreadsb.exit.i.i.i | |
store i32 35, i32* %arrayidx.i25.i.i.i, align 4, !tbaa !39 | |
%arrayidx.i14.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %25, i64 0, i32 5, i64 %idxprom.i1.i | |
store i64 2, i64* %arrayidx.i14.i.i.i, align 8, !tbaa !40 | |
%arrayidx.i12.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %25, i64 0, i32 6, i64 %idxprom.i1.i | |
store i64 999, i64* %arrayidx.i12.i.i.i, align 8, !tbaa !40 | |
%arrayidx.i.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %25, i64 0, i32 7, i64 %idxprom.i1.i | |
store i64 0, i64* %arrayidx.i.i.i.i, align 8, !tbaa !40 | |
%28 = load i8, i8* %arrayidx.i.i4, align 1, !tbaa !19 | |
%cmp.i.i5.i.i.i = icmp eq i8 %28, -127 | |
br i1 %cmp.i.i5.i.i.i, label %if.then5.i9.i.i.i, label %__kmpc_barrier.exit10.i.i.i | |
if.then5.i9.i.i.i: ; preds = %if.then83.i.i.i | |
tail call void asm sideeffect "bar.sync $0;", "r,~{memory}"(i32 0) #7, !srcloc !41 | |
br label %__kmpc_barrier.exit10.i.i.i | |
__kmpc_barrier.exit10.i.i.i: ; preds = %if.then5.i9.i.i.i, %if.then83.i.i.i | |
br i1 %cmp.i, label %if.then95.i.i.i, label %if.end97.i.i.i | |
if.then95.i.i.i: ; preds = %__kmpc_barrier.exit10.i.i.i | |
%29 = load %class.omptarget_nvptx_ThreadPrivateContext*, %class.omptarget_nvptx_ThreadPrivateContext** addrspacecast (%class.omptarget_nvptx_ThreadPrivateContext* addrspace(3)* @omptarget_nvptx_threadPrivateContext to %class.omptarget_nvptx_ThreadPrivateContext**), align 8, !tbaa !21 | |
%cnt.i.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %29, i64 0, i32 9 | |
store i64 0, i64* %cnt.i.i.i.i, align 8, !tbaa !40 | |
tail call void @llvm.nvvm.membar.cta() #5 | |
br label %if.end97.i.i.i | |
if.end97.i.i.i: ; preds = %if.then95.i.i.i, %__kmpc_barrier.exit10.i.i.i | |
%30 = load i8, i8* %arrayidx.i.i4, align 1, !tbaa !19 | |
%cmp.i.i.i.i.i = icmp eq i8 %30, -127 | |
%cmp.i.i.i.i = icmp ugt i32 %24, 1 | |
%or.cond45.i.i.i = and i1 %cmp.i.i.i.i, %cmp.i.i.i.i.i | |
br i1 %or.cond45.i.i.i, label %if.then5.i.i.i.i, label %omp.dispatch.cond.i.preheader | |
if.then5.i.i.i.i: ; preds = %if.end97.i.i.i | |
tail call void asm sideeffect "bar.sync $0;", "r,~{memory}"(i32 0) #7, !srcloc !41 | |
br label %omp.dispatch.cond.i.preheader | |
omp.dispatch.cond.i.preheader: ; preds = %if.then5.i.i.i.i, %if.end97.i.i.i, %if.then32.i.i.i | |
br label %omp.dispatch.cond.i.outer | |
omp.dispatch.cond.i.outer: ; preds = %omp.dispatch.cond.i.preheader, %omp.inner.for.body.preheader.i | |
%Count1.0.i.ph = phi i32 [ 0, %omp.dispatch.cond.i.preheader ], [ %67, %omp.inner.for.body.preheader.i ] | |
br label %omp.dispatch.cond.i | |
omp.dispatch.cond.i: ; preds = %omp.dispatch.cond.i.outer, %omp.dispatch.body.i | |
%31 = l | |
%32 = load i32, i32* %arrayidx.i.i.i1.i, align 4, !tbaa !39 | |
%33 = add i32 %32, -33 | |
%34 = icmp ult i32 %33, 2 | |
br i1 %34, label %if.then7.i.i.i, label %if.end27.i.i.i | |
if.then7.i.i.i: ; preds = %omp.dispatch.cond.i | |
%arrayidx.i19.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %31, i64 0, i32 7, i64 %idxprom.i1.i | |
%35 = load i64, i64* %arrayidx.i19.i.i.i, align 8, !tbaa !40 | |
%conv.i.i.i = trunc i64 %35 to i32 | |
%arrayidx.i17.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %31, i64 0, i32 6, i64 %idxprom.i1.i | |
%36 = load i64, i64* %arrayidx.i17.i.i.i, align 8, !tbaa !40 | |
%conv10.i.i.i = trunc i64 %36 to i32 | |
%cmp11.i.i.i = icmp sgt i32 %conv.i.i.i, %conv10.i.i.i | |
br i1 %cmp11.i.i.i, label %omp.dispatch.end.i, label %if.end13.i.i.i | |
if.end13.i.i.i: ; preds = %if.then7.i.i.i | |
%arrayidx.i15.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %31, i64 0, i32 5, i64 %idxprom.i1.i | |
%37 = load i64, i64* %arrayidx.i15.i.i.i, align 8, !tbaa !40 | |
%conv15.i.i.i = trunc i64 %37 to i32 | |
%add.i.i.i18 = add i32 %conv.i.i.i, -1 | |
%sub.i.i.i = add i32 %add.i.i.i18, %conv15.i.i.i | |
%cmp16.i.i.i = icmp sgt i32 %sub.i.i.i, %conv10.i.i.i | |
%spec.select.i.i.i = select i1 %cmp16.i.i.i, i32 %conv10.i.i.i, i32 %sub.i.i.i | |
%arrayidx.i13.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %31, i64 0, i32 8, i64 %idxprom.i1.i | |
%38 = load i64, i64* %arrayidx.i13.i.i.i, align 8, !tbaa !40 | |
%add23.i.i.i = add i64 %38, %35 | |
%sext.i.i.i = shl i64 %add23.i.i.i, 32 | |
%conv24.i.i.i = ashr exact i64 %sext.i.i.i, 32 | |
store i64 %conv24.i.i.i, i64* %arrayidx.i19.i.i.i, align 8, !tbaa !40 | |
br label %omp.dispatch.body.i | |
if.end27.i.i.i: ; preds = %omp.dispatch.cond.i | |
%arrayidx.i9.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %31, i64 0, i32 5, i64 %idxprom.i1.i | |
%39 = load i64, i64* %arrayidx.i9.i.i.i, align 8, !tbaa !40 | |
%conv31.i.i.i = trunc i64 %39 to i32 | |
%arrayidx.i7.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %31, i64 0, i32 7, i64 %idxprom.i1.i | |
%40 = load i64, i64* %arrayidx.i7.i.i.i, align 8, !tbaa !40 | |
%conv33.i.i.i = trunc i64 %40 to i32 | |
%arrayidx.i5.i.i.i = getelementptr inbounds %class.omptarget_nvptx_ThreadPrivateContext, %class.omptarget_nvptx_ThreadPrivateContext* %31, i64 0, i32 6, i64 %idxprom.i1.i | |
%41 = load i64, i64* %arrayidx.i5.i.i.i, align 8, !tbaa !40 | |
%conv35.i.i.i = trunc i64 %41 to i32 | |
%42 = tail call i32 @llvm.nvvm.vote.ballot(i1 true) #5 | |
%neg.i.i.i.i.i.i.i = sub i32 0, %42 | |
%and.i.i.i.i.i.i.i = and i32 %42, %neg.i.i.i.i.i.i.i | |
%43 = tail call i32 @llvm.ctlz.i32(i32 %and.i.i.i.i.i.i.i, i1 false) #5, !range !42 | |
%sub.i.i.i.i.i = sub nsw i32 31, %43 | |
%44 = tail call i32 asm "mov.u32 $0, %lanemask_lt;", "=r"() #6, !srcloc !43 | |
%and.i.i.i.i.i = and i32 %44, %42 | |
%45 = tail call i32 @llvm.ctpop.i32(i32 %and.i.i.i.i.i) #5, !range !42 | |
%cmp.i.i.i.i2.i = icmp eq i32 %and.i.i.i.i.i, 0 | |
br i1 %cmp.i.i.i.i2.i, label %if.then.i.i.i.i.i, label %_ZN27omptarget_nvptx_LoopSupportIiiE8NextIterEv.exit.i.i.i.i | |
if.then.i.i.i.i.i: ; preds = %if.end27.i.i. | |
%add1.i.i.i.i = add nsw i32 %add.i.i.i.i, %conv31.i.i.i | |
%cmp.i.i.i3.i = icmp sgt i32 %add.i.i.i.i, %conv35.i.i.i | |
%cmp2.i.i.i.i = icmp sgt i32 %add1.i.i.i.i, %conv35.i.i.i | |
%or.cond.i.i.i.i = or i1 %cmp.i.i.i3.i, %cmp2.i.i.i.i | |
%sub.i.i.i.i = add nsw i32 %add1.i.i.i.i, -1 | |
%sub.i.mux.i.i.i = select i1 %or.cond.i.i.i.i, i32 %conv35.i.i.i, i32 %sub.i.i.i.i | |
br i1 %cmp.i.i.i3.i, label %omp.dispatch.end.i, label %omp.dispatch.body.i | |
omp.dispatch.body.i: ; preds = %_ZN27omptarget_nvptx_LoopSupportIiiE8NextIterEv.exit.i.i.i.i, %if.end13.i.i.i | |
%.omp.ub.1.i = phi i32 [ %spec.select.i.i.i, %if.end13.i.i.i ], [ %sub.i.mux.i.i.i, %_ZN27omptarget_nvptx_LoopSupportIiiE8NextIterEv.exit.i.i.i.i ] | |
%.omp.lb.1.i = phi i32 [ %conv.i.i.i, %if.end13.i.i.i ], [ %add.i.i.i.i, %_ZN27omptarget_nvptx_LoopSupportIiiE8NextIterEv.exit.i.i.i.i ] | |
%cmp25.i = icmp sgt i32 %.omp.lb.1.i, %.omp.ub.1.i | |
br i1 %cmp25.i, label %omp.dispatch.cond.i, label %omp.inner.for.body.preheader.i | |
omp.inner.for.body.preheader.i: ; preds = %omp.dispatch.body.i | |
%54 = icmp sgt i32 %.omp.ub.1.i, %.omp.lb.1.i | |
%smax.i = select i1 %54, i32 %.omp.ub.1.i, i32 %.omp.lb.1.i | |
%55 = sub i32 %smax.i, %.omp.lb.1.i | |
%56 = add i32 %.omp.lb.1.i, 1 | |
%57 = mul i32 %55, %56 | |
%58 = xor i32 %.omp.lb.1.i, -1 | |
%59 = add i32 %smax.i, %58 | |
%60 = zext i32 %59 to i33 | |
%61 = zext i32 %55 to i33 | |
%62 = mul i33 %60, %61 | |
%63 = lshr i33 %62, 1 | |
%64 = trunc i33 %63 to i32 | |
%65 = add i32 %.omp.lb.1.i, %Count1.0.i.ph | |
%66 = add i32 %65, %57 | |
%67 = add i32 %66, %64 | |
br label %omp.dispatch.cond.i.outer | |
omp.dispatch.end.i: ; preds = %_ZN27omptarget_nvptx_LoopSupportIiiE8NextIterEv.exit.i.i.i.i, %if.then7.i.i.i | |
%68 = load i8, i8* %arrayidx.i.i4, align 1, !tbaa !19 | |
%cmp.i2.i.i.i = icmp eq i8 %68, -127 | |
br i1 %cmp.i2.i.i.i, label %if.else.i3.i.i.i, label %.omp.reduction.then.i | |
if.else.i3.i.i.i: ; preds = %omp.dispatch.end.i | |
%69 = tail call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #5, !range !18 | |
%cmp.i.i8.i = icmp eq i32 %69, 1 | |
br i1 %cmp.i.i8.i, label %.omp.reduction.then.i, label %if.end.i.i.i | |
if.end.i.i.i: ; preds = %if.else.i3.i.i.i | |
%sub.i.i9.i = add nuw nsw i32 %69, 31 | |
%div.i.i.i19 = lshr i32 %sub.i.i9.i, 5 | |
%rem.i.i.i20 = and i32 %69, 31 | |
%cmp5.i.i.i = icmp eq i32 %rem.i.i.i20, 0 | |
%sub6.i.i.i = add nsw i32 %div.i.i.i19, -1 | |
%cmp7.i.i.i = icmp ult i32 %div.i.i.i3, %sub6.i.i.i | |
%or.cond.i.i.i = or i1 %cmp5.i.i.i, %cmp7.i.i.i | |
br i1 %or.cond.i.i.i, label %if.then8.i.i.i, label %if.else.i.i.i | |
if.then8.i.i.i: | |
%add.i.i.4.i.i.i.i = add nsw i32 %74, %add.i.i.3.i.i.i.i | |
br label %if.end15.i.i.i | |
if.else.i.i.i: ; preds = %if.end.i.i.i | |
%cmp9.i.i.i = icmp ugt i32 %69, 1 | |
br i1 %cmp9.i.i.i, label %if.then10.i.i.i, label %__kmpc_nvptx_parallel_reduce_nowait_v2.exit.i | |
if.then10.i.i.i: ; preds = %if.else.i.i.i | |
%div.i5.i.i.i = lshr i32 %rem.i.i.i20, 1 | |
%cmp9.i6.i.i.i = icmp eq i32 %div.i5.i.i.i, 0 | |
br i1 %cmp9.i6.i.i.i, label %if.end15.i.i.i, label %while.body.lr.ph.i8.i.i.i | |
while.body.lr.ph.i8.i.i.i: ; preds = %if.then10.i.i.i | |
%conv.i7.i.i.i = and i16 %conv.i.i, 31 | |
br label %while.body.i14.i.i.i | |
while.body.i14.i.i.i: ; preds = %while.body.i14.i.i.i, %while.body.lr.ph.i8.i.i.i | |
%Count1.2.i = phi i32 [ %Count1.0.i.ph, %while.body.lr.ph.i8.i.i.i ], [ %storemerge.i.i.i, %while.body.i14.i.i.i ] | |
%mask.011.i9.i.i.i = phi i32 [ %div.i5.i.i.i, %while.body.lr.ph.i8.i.i.i ], [ %div3.i20.i.i.i, %while.body.i14.i.i.i ] | |
%curr_size.010.i10.i.i.i = phi i32 [ %rem.i.i.i20, %while.body.lr.ph.i8.i.i.i ], [ %div2.i19.i.i.i, %while.body.i14.i.i.i ] | |
%conv1.i11.i.i.i = trunc i32 %mask.011.i9.i.i.i to i16 | |
%sext.i12.i.i.i = shl i32 %mask.011.i9.i.i.i, 16 | |
%conv.i.i.i13.i.i.i = ashr exact i32 %sext.i12.i.i.i, 16 | |
%75 = tail call i32 @llvm.nvvm.shfl.sync.down.i32(i32 -1, i32 %Count1.2.i, i32 %conv.i.i.i13.i.i.i, i32 31) #5 | |
%76 = icmp ult i16 %conv.i7.i.i.i, %conv1.i11.i.i.i | |
%add.i.i.i15.i.i.i = select i1 %76, i32 %Count1.2.i, i32 0 | |
%storemerge.i.i.i = add nsw i32 %add.i.i.i15.i.i.i, %75 | |
%add.i18.i.i.i = add nuw i32 %curr_size.010.i10.i.i.i, 1 | |
%div2.i19.i.i.i = lshr i32 %add.i18.i.i.i, 1 | |
%div3.i20.i.i.i = lshr i32 %add.i18.i.i.i, 2 | |
%cmp.i21.i.i.i = icmp eq i32 %div3.i20.i.i.i, 0 | |
br i1 %cmp.i21.i.i.i, label %if.end15.i.i.i, label %while.body.i14.i.i.i | |
if.end15.i.i.i: ; preds = %while.body.i14.i.i.i, %if.then10.i.i.i, %if.then8.i.i.i | |
%Count1.3.i = phi i32 [ %add.i.i.4.i.i.i.i, %if.then8.i.i.i ], [ %Count1.0.i.ph, %if.then10.i.i.i ], [ %storemerge.i.i.i, %while.body.i14.i.i.i ] | |
%cmp16.i.i10.i = icmp ugt i32 %69, 32 | |
br i1 %cmp16.i.i10.i, label %__kmpc_barrier.exit16.i.i.i.i, label %__kmpc_nvptx_parallel_reduce_nowait_v2.exit.i | |
__kmpc_barrier.exit16.i.i.i.i: ; preds = %if.end15.i.i.i | |
%nvptx_lane_id.i.i.i.i = and i32 %0, 31 | |
tail call void asm sideeffect "bar.sync $0;", "r,~{memory}"(i32 0) #7, !srcloc !41 | |
%warp_master.i.i.i.i = icmp eq i32 %nvptx_lane_id.i.i.i.i, 0 | |
br i1 %warp_master.i.i.i.i, label %then.i.i.i.i, label %ifcont.i.i.i.i | |
then.i.i.i.i: ; preds = %__kmpc_barrier.exit16.i.i.i.i | |
%77 = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i64 %idxprom.i.i | |
store volatile i32 %Count1.3.i, i32 addrspace(3)* %77, align 4, !tbaa !46 | |
br label %ifcont.i.i.i.i | |
ifcont.i.i.i.i: ; preds = %then.i.i.i.i, %__kmpc_barrier.exit16.i.i.i.i | |
%78 = load i8, i8* %arrayidx.i.i4, align 1, !tbaa !19 | |
%cmp.i.i.i.i.i.i = icmp eq i8 %78, -127 | |
br i1 %cmp.i.i.i.i.i.i, label %if.then5.i.i.i.i.i, | |
__kmpc_barrier.exit.i.i.i.i: ; preds = %if.then5.i.i.i.i.i, %ifcont.i.i.i.i | |
%is_active_thread.i.i.i.i = icmp ult i32 %0, %div.i.i.i19 | |
br i1 %is_active_thread.i.i.i.i, label %then4.i.i.i.i, label %_omp_reduction_inter_warp_copy_func.exit.i.i.i | |
then4.i.i.i.i: ; preds = %__kmpc_barrier.exit.i.i.i.i | |
%79 = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i64 %idxprom.i1.i | |
%80 = load volatile i32, i32 addrspace(3)* %79, align 4, !tbaa !46 | |
br label %_omp_reduction_inter_warp_copy_func.exit.i.i.i | |
_omp_reduction_inter_warp_copy_func.exit.i.i.i: ; preds = %then4.i.i.i.i, %__kmpc_barrier.exit.i.i.i.i | |
%Count1.4.i = phi i32 [ %80, %then4.i.i.i.i ], [ %Count1.3.i, %__kmpc_barrier.exit.i.i.i.i ] | |
%cmp18.i.i.i = icmp eq i32 %div.i.i.i3, 0 | |
br i1 %cmp18.i.i.i, label %if.then19.i.i.i, label %__kmpc_nvptx_parallel_reduce_nowait_v2.exit.i | |
if.then19.i.i.i: ; preds = %_omp_reduction_inter_warp_copy_func.exit.i.i.i | |
%div.i.i.i.i = lshr i32 %sub.i.i9.i, 6 | |
%cmp9.i.i.i.i = icmp eq i32 %div.i.i.i.i, 0 | |
br i1 %cmp9.i.i.i.i, label %__kmpc_nvptx_parallel_reduce_nowait_v2.exit.i, label %while.body.i.i.i.i | |
while.body.i.i.i.i: ; preds = %if.then19.i.i.i, %while.body.i.i.i.i | |
%Count1.5.i = phi i32 [ %storemerge26.i.i.i, %while.body.i.i.i.i ], [ %Count1.4.i, %if.then19.i.i.i ] | |
%mask.011.i.i.i.i = phi i32 [ %div3.i.i.i.i, %while.body.i.i.i.i ], [ %div.i.i.i.i, %if.then19.i.i.i ] | |
%curr_size.010.i.i.i.i = phi i32 [ %div2.i.i.i.i, %while.body.i.i.i.i ], [ %div.i.i.i19, %if.then19.i.i.i ] | |
%conv1.i.i.i.i = trunc i32 %mask.011.i.i.i.i to i16 | |
%sext.i.i.i.i = shl i32 %mask.011.i.i.i.i, 16 | |
%conv.i.i.i.i.i.i = ashr exact i32 %sext.i.i.i.i, 16 | |
%81 = tail call i32 @llvm.nvvm.shfl.sync.down.i32(i32 -1, i32 %Count1.5.i, i32 %conv.i.i.i.i.i.i, i32 31) #5 | |
%82 = icmp ult i16 %conv.i.i, %conv1.i.i.i.i | |
%add.i.i.i.i.i.i = select i1 %82, i32 %Count1.5.i, i32 0 | |
%storemerge26.i.i.i = add nsw i32 %add.i.i.i.i.i.i, %81 | |
%add.i.i.i12.i = add nuw i32 %curr_size.010.i.i.i.i, 1 | |
%div2.i.i.i.i = lshr i32 %add.i.i.i12.i, 1 | |
%div3.i.i.i.i = lshr i32 %add.i.i.i12.i, 2 | |
%cmp.i1.i.i.i = icmp eq i32 %div3.i.i.i.i, 0 | |
br i1 %cmp.i1.i.i.i, label %__kmpc_nvptx_parallel_reduce_nowait_v2.exit.i, label %while.body.i.i.i.i | |
__kmpc_nvptx_parallel_reduce_nowait_v2.exit.i: ; preds = %while.body.i.i.i.i, %if.then19.i.i.i, %_omp_reduction_inter_warp_copy_func.exit.i.i.i, %if.end15.i.i.i, %if.else.i.i.i | |
%Count1.6.i = phi i32 [ %Count1.4.i, %if.then19.i.i.i ], [ %Count1.4.i, %_omp_reduction_inter_warp_copy_func.exit.i.i.i ], [ %Count1.3.i, %if.end15.i.i.i ], [ %Count1.0.i.ph, %if.else.i.i.i ], [ %storemerge26.i.i.i, %while.body.i.i.i.i ] | |
br i1 %cmp.i, label %.omp.reduction.then.i, label %__omp_outlined__.exit | |
.omp.reduction.then.i: ; preds = %__kmpc_nvptx_parallel_reduce_nowait_v2.exit.i, %if.else.i3.i.i.i, %omp.dispatch.end.i | |
%Count1.724.i = phi i32 [ %Count1.6.i, %__kmpc_nvptx_parallel_reduce_nowait_v2.exit.i ], [ %Count1.0.i.ph, %if.else.i3.i.i.i ], [ %Count1.0.i.ph, %omp.dispatch.end.i ] | |
%83 = load i32, i32* %Count, align 4, !tbaa !46 | |
%add4.i = add nsw i32 %83, %Count1.724.i | |
store i32 %add4.i, i32* %Count, align 4, !tbaa !46 | |
br label %__omp_outlined__.exit | |
__omp_outlined__.exit: ; preds = %__kmpc_nvptx_parallel_reduce_nowait_v2.exit.i, %.omp.reduction.then.i | |
%cmp.i.pr = phi i1 [ false, %__kmpc_nvptx_parallel_reduce_nowait_v2.exit.i ], [ %cmp.i, %.omp.reduction.then.i ] | |
tail call void asm sideeffect "bar.sync $0;", "r,~{memory}"(i32 0) #7, !srcloc !50 | |
br i1 %cmp.i.pr, label %if.then1.i, label %__kmpc_spmd_ | |
%add.i.i = or i32 %87, 1 | |
%idxprom.i3.i.i10 = zext i32 %rem.i.i9 to i64 | |
%arrayidx.i4.i.i11 = getelementptr inbounds %class.omptarget_nvptx_Queue, %class.omptarget_nvptx_Queue* %arrayidx.i8, i64 0, i32 3, i64 %idxprom.i3.i.i10 | |
br label %while.cond.i.i13 | |
while.cond.i.i13: ; preds = %while.cond.i.i13, %if.then1.i | |
%88 = atomicrmw or i32* %arrayidx.i4.i.i11, i32 0 seq_cst | |
%cmp.i.i.i12 = icmp eq i32 %88, %add.i.i | |
br i1 %cmp.i.i.i12, label %_ZN21omptarget_nvptx_QueueI36omptarget_nvptx_ThreadPrivateContextLj32EE7EnqueueEPS0_.exit.i, label %while.cond.i.i13 | |
_ZN21omptarget_nvptx_QueueI36omptarget_nvptx_ThreadPrivateContextLj32EE7EnqueueEPS0_.exit.i: ; preds = %while.cond.i.i13 | |
%arrayidx.i2.i.i14 = getelementptr inbounds %class.omptarget_nvptx_Queue, %class.omptarget_nvptx_Queue* %arrayidx.i8, i64 0, i32 1, i64 %idxprom.i3.i.i10 | |
%89 = bitcast %class.omptarget_nvptx_ThreadPrivateContext** %arrayidx.i2.i.i14 to i64* | |
%90 = atomicrmw xchg i64* %89, i64 %85 seq_cst | |
%add.i.i.i15 = add nuw nsw i32 %add.i.i, 1 | |
%rem.i.i.i16 = and i32 %add.i.i.i15, 33554430 | |
%91 = atomicrmw xchg i32* %arrayidx.i4.i.i11, i32 %rem.i.i.i16 seq_cst | |
br label %__kmpc_spmd_kernel_deinit_v2.exit | |
__kmpc_spmd_kernel_deinit_v2.exit: ; preds = %__omp_outlined__.exit, %_ZN21omptarget_nvptx_QueueI36omptarget_nvptx_ThreadPrivateContextLj32EE7EnqueueEPS0_.exit.i | |
ret void | |
} | |
; Function Attrs: nounwind readnone | |
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #1 | |
; Function Attrs: nounwind readnone | |
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1 | |
; Function Attrs: nounwind | |
declare void @llvm.nvvm.membar.cta() #2 | |
; Function Attrs: nounwind readnone speculatable | |
declare i32 @llvm.ctlz.i32(i32, i1 immarg) #3 | |
; Function Attrs: nounwind readnone speculatable | |
declare i32 @llvm.ctpop.i32(i32) #3 | |
; Function Attrs: convergent inaccessiblememonly nounwind | |
declare i32 @llvm.nvvm.shfl.sync.idx.i32(i32, i32, i32, i32) #4 | |
; Function Attrs: convergent inaccessiblememonly nounwind | |
declare i32 @llvm.nvvm.vote.ballot(i1) #4 | |
; Function Attrs: convergent inaccessiblememonly nounwind | |
declare i32 @llvm.nvvm.shfl.sync.down.i32(i32, i32, i32, i32) #4 | |
attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx61,+sm_70" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #1 = { nounwind readnone } | |
attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"= | |
!8 = !{null, !"align", i32 16} | |
!9 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} | |
!10 = !{!"clang version 9.0.0 (/dev/shm/jdoerfert/llvm-project/llvm/tools/clang 3bc6e2a7aa3853b06045c42e81af094647c48676)"} | |
!11 = !{!"clang version 9.0.0 "} | |
!12 = !{i32 1, i32 2} | |
!13 = !{!14, !14, i64 0} | |
!14 = !{!"int", !15, i64 0} | |
!15 = !{!"omnipotent char", !16, i64 0} | |
!16 = !{!"Simple C++ TBAA"} | |
!17 = !{i32 0, i32 1024} | |
!18 = !{i32 1, i32 1025} | |
!19 = !{!15, !15, i64 0} | |
!20 = !{i32 1115} | |
!21 = !{!22, !22, i64 0} | |
!22 = !{!"any pointer", !15, i64 0} | |
!23 = !{!24, !15, i64 40} | |
!24 = !{!"_ZTS25omptarget_nvptx_TaskDescr", !25, i64 0, !28, i64 40, !22, i64 56} | |
!25 = !{!"_ZTSN25omptarget_nvptx_TaskDescr20SavedLoopDescr_itemsE", !26, i64 0, !26, i64 8, !26, i64 16, !26, i64 24, !27, i64 32} | |
!26 = !{!"long", !15, i64 0} | |
!27 = !{!"_ZTS11kmp_sched_t", !15, i64 0} | |
!28 = !{!"_ZTSN25omptarget_nvptx_TaskDescr15TaskDescr_itemsE", !15, i64 0, !15, i64 1, !29, i64 2, !26, i64 8} | |
!29 = !{!"short", !15, i64 0} | |
!30 = !{!24, !29, i64 42} | |
!31 = !{!24, !26, i64 48} | |
!32 = !{i32 -2144259087} | |
!33 = !{!24, !22, i64 56} | |
!34 = !{!35, !22, i64 24} | |
!35 = !{!"_ZTS38__kmpc_data_sharing_worker_slot_static", !22, i64 0, !22, i64 8, !22, i64 16, !22, i64 24, !15, i64 32} | |
!36 = !{!35, !22, i64 0} | |
!37 = !{!35, !22, i64 8} | |
!38 = !{!35, !22, i64 16} | |
!39 = !{!27, !27, i64 0} | |
!40 = !{!26, !26, i64 0} | |
!41 = !{i32 -2144261549} | |
!42 = !{i32 0, i32 33} | |
!43 = !{i32 16381} | |
!44 = !{i32 15962} | |
!45 = !{i32 16120} | |
!46 = !{!47, !47, i64 0} | |
!47 = !{!"int", !48, i64 0} | |
!48 = !{!"omnipotent char", !49, i64 0} | |
!49 = !{!"Simple C/C++ TBAA"} | |
!50 = !{i32 -2144258395} | |
; __CLANG_OFFLOAD_BUNDLE____END__ openmp-nvptx64-nvida-cuda | |
; __CLANG_OFFLOAD_BUNDLE____START__ host-x86_64-unknown-linux-gnu | |
; ModuleID = '/tmp/jdoerfert/test-a3d979.bc' | |
source_filename = "test.c" | |
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | |
target triple = "x86_64-unknown-linux-gnu" | |
%struct.ident_t = type { i32, i32, i32, i32, i8* } | |
%struct.__tgt_offload_entry = type { i8*, i8*, i64, i32, i32 } | |
%struct.__tgt_device_image = type { i8*, i8*, %struct.__tgt_offload_entry*, %struct.__tgt_offload_entry* } | |
%struct.__tgt_bin_desc = type { i32, %struct.__tgt_device_image*, %struct.__tgt_offload_entry*, %struct.__tgt_offload_entry* } | |
$.omp_offloading.descriptor_reg.nvptx64-nvida-cuda = comdat any | |
@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 | |
@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 | |
@.gomp_critical_user_.reduction.var = common global [8 x i32] zeroinitializer | |
@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 18, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 | |
@.__omp_offloading_10_1012dd1_main_l7.region_id = weak constant i8 0 | |
@.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4] | |
@.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 547] | |
@.str.1 = private unnamed_addr constant [16 x i8] c"ERROR [@%i] %i\0A\00", align 1 | |
@.omp_offloading.entry_name = internal unnamed_addr constant [36 x i8] c"__omp_offloading_10_1012dd1_main_l7\00" | |
@.omp_offloading.entry.__omp_offloading_10_1012dd1_main_l7 = weak local_unnamed_addr constant %struct.__tgt_offload_entry { i8* @.__omp_offloading_10_1012dd1_main_l7.region_id, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.omp_offloading.entry_name, i32 0, i32 0), i64 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 | |
@.omp_offloading.entries_begin = external constant %struct.__tgt_offload_entry | |
@.omp_offloading.entries_end = external constant %struct.__tgt_offload_entry | |
@.omp_offloading.img_start.nvptx64-nvida-cuda = extern_weak constant i8 | |
@.omp_offloading.img_end.nvptx64-nvida-cuda = extern_weak constant i8 | |
@.omp_offloading.device_images = internal unnamed_addr constant [1 x %struct.__tgt_device_image] [%struct.__tgt_device_image { i8* @.omp_offloading.img_start.nvptx64-nvida-cuda, i8* @.omp_offloading.img_end.nvptx64-nvida-cuda, %struct.__tgt_offload_entry* @.omp_offloading.entries_begin, %struct.__tgt_offload_entry* @.omp_offloading.entries_end }], comdat($.omp_offloading.descriptor_reg.nvptx64-nvida-cuda), align 8 | |
@.omp_offloading.descriptor = internal constant %struct.__tgt_bin_desc { i32 1, %struct.__tgt_device_image* getelementptr inbounds ([1 x %struct.__tgt_device_image], [1 x %struct.__tgt_device_image]* @.omp_offloading.device_images, i32 0, i32 0), %struct.__tgt_offload_entry* @.omp_offloading.entries_begin, %struct.__tgt_offload_entry* @.omp_offloading.entries_end }, comdat($.omp_offloading.descriptor_reg.nvptx64-nvida-cuda), align 8 | |
@__dso_handle = external hidden global i8 | |
@llvm.global_ctors = appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @.omp_offloading.requires_reg, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @.omp_offloading.descriptor_reg.nvptx64-nvida-cuda, i8* bitcast (void ()* @.omp_offloading.descriptor_reg.nvptx64-nvida-cuda to i8*) }] | |
; Function Attrs: nounwind uwtable | |
define dso_local i32 @main() local_unnamed_addr #0 { | |
entry: | |
%Count = alloca i32, align 4 | |
%.offload_baseptrs = alloca [1 x i8*], align 8 | |
%.offload_ptrs = alloca [1 x i8*], align 8 | |
%0 = bitcast i32* %Count to i8* | |
%1 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs, i64 0, i64 0 | |
%2 = bitcast [1 x i8*]* %.offload_baseptrs to i32** | |
%3 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i64 0, i64 0 | |
%4 = bitcast [1 x i8*]* %.offload_ptrs to i32** | |
br label %for.body | |
for.cond.cleanup: ; preds = %if.end | |
ret i32 0 | |
for.body: ; preds = %if.end, %entry | |
%i.05 = phi i32 [ 0, %entry ], [ %inc, %if.end ] | |
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4 | |
store i32 0, i32* %Count, align 4, !tbaa !3 | |
store i32* %Count, i32** %2, align 8 | |
if.then: ; preds = %omp_offload.cont | |
%call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.1, i64 0, i64 0), i32 %i.05, i32 %8) | |
br label %if.end | |
if.end: ; preds = %omp_offload.cont, %if.then | |
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4 | |
%inc = add nuw nsw i32 %i.05, 1 | |
%exitcond = icmp eq i32 %inc, 1000 | |
br i1 %exitcond, label %for.cond.cleanup, label %for.body | |
} | |
; Function Attrs: argmemonly nounwind | |
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 | |
; Function Attrs: norecurse nounwind uwtable | |
define internal void @.omp_outlined.(i32* noalias nocapture readonly %.global_tid., i32* noalias nocapture readnone %.bound_tid., i32* nocapture dereferenceable(4) %Count) #2 { | |
entry: | |
%.omp.lb = alloca i32, align 4 | |
%.omp.ub = alloca i32, align 4 | |
%.omp.stride = alloca i32, align 4 | |
%.omp.is_last = alloca i32, align 4 | |
%Count1 = alloca i32, align 4 | |
%.omp.reduction.red_list = alloca [1 x i8*], align 8 | |
%0 = bitcast i32* %.omp.lb to i8* | |
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4 | |
store i32 0, i32* %.omp.lb, align 4, !tbaa !3 | |
%1 = bitcast i32* %.omp.ub to i8* | |
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #4 | |
store i32 999, i32* %.omp.ub, align 4, !tbaa !3 | |
%2 = bitcast i32* %.omp.stride to i8* | |
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #4 | |
store i32 1, i32* %.omp.stride, align 4, !tbaa !3 | |
%3 = bitcast i32* %.omp.is_last to i8* | |
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %3) #4 | |
store i32 0, i32* %.omp.is_last, align 4, !tbaa !3 | |
%4 = bitcast i32* %Count1 to i8* | |
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %4) #4 | |
store i32 0, i32* %Count1, align 4, !tbaa !3 | |
%5 = load i32, i32* %.global_tid., align 4, !tbaa !3 | |
tail call void @__kmpc_dispatch_init_4(%struct.ident_t* nonnull @0, i32 %5, i32 35, i32 0, i32 999, i32 1, i32 2) #4 | |
%6 = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* nonnull @0, i32 %5, i32* nonnull %.omp.is_last, i32* nonnull %.omp.lb, i32* nonnull %.omp.ub, i32* nonnull %.omp.stride) #4 | |
%tobool12 = icmp eq i32 %6, 0 | |
br i1 %tobool12, label %omp.dispatch.end, label %omp.dispatch.body | |
omp.dispatch.cond.loopexit: ; preds = %omp.inner.for.body.lr.ph, %omp.dispatch.body | |
%7 = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* nonnull @0, i32 %5, i32* nonnull %.omp.is_last, i32* nonnull %.omp.lb, i32* nonnull %.omp.ub, i32* nonnull %.omp.stride) #4 | |
%tobool = icmp eq i32 %7, 0 | |
br i1 %tobool, label %omp.dispatch.end, label %omp.dispatch.body | |
omp.dispatch.body: ; preds = %entry, %omp.dispatch.cond.loopexit | |
%8 = load i32, i32* %.omp.lb, align 4, !tbaa !3 | |
%9 = load i32, | |
store i32 %add4, i32* %Count, align 4, !tbaa !3 | |
call void @__kmpc_end_reduce_nowait(%struct.ident_t* nonnull @1, i32 %5, [8 x i32]* nonnull @.gomp_critical_user_.reduction.var) #4 | |
br label %.omp.reduction.default | |
.omp.reduction.case2: ; preds = %omp.dispatch.end | |
%28 = load i32, i32* %Count1, align 4, !tbaa !3 | |
%29 = atomicrmw add i32* %Count, i32 %28 monotonic | |
br label %.omp.reduction.default | |
.omp.reduction.default: ; preds = %.omp.reduction.case2, %.omp.reduction.case1, %omp.dispatch.end | |
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %4) #4 | |
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %3) #4 | |
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #4 | |
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #4 | |
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4 | |
ret void | |
} | |
declare dso_local void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) local_unnamed_addr | |
declare dso_local i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) local_unnamed_addr | |
; Function Attrs: norecurse nounwind uwtable | |
define internal void @.omp.reduction.reduction_func(i8* nocapture readonly, i8* nocapture readonly) #2 { | |
entry: | |
%2 = bitcast i8* %1 to i32** | |
%3 = load i32*, i32** %2, align 8 | |
%4 = bitcast i8* %0 to i32** | |
%5 = load i32*, i32** %4, align 8 | |
%6 = load i32, i32* %5, align 4, !tbaa !3 | |
%7 = load i32, i32* %3, align 4, !tbaa !3 | |
%add = add nsw i32 %7, %6 | |
store i32 %add, i32* %5, align 4, !tbaa !3 | |
ret void | |
} | |
declare dso_local i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) local_unnamed_addr | |
declare dso_local void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) local_unnamed_addr | |
; Function Attrs: argmemonly nounwind | |
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 | |
declare dso_local i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr | |
declare dso_local void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) local_unnamed_addr | |
declare !callback !8 dso_local void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) local_unnamed_addr | |
declare dso_local i32 @__tgt_target_teams(i64, i8*, i32, i8**, i8**, i64*, i64*, i32, i32) local_unnamed_addr | |
; Function Attrs: nounwind | |
declare dso_local i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #3 | |
; Function Attrs: nounwind uwtable | |
define internal void @.omp_offloading.descriptor_unreg(i8* nocapture readnone) #0 section ".text.startup" comdat($.omp_offloading.descriptor_reg.nvptx64-nvida-cuda) { | |
entry: | |
%1 = tail call i32 @__tgt_unregister_lib(%struct.__tgt_bin_desc* nonnull @.omp_offloading.descriptor) #4 | |
ret void | |
} | |
declare dso_local i32 @__tgt_unregister_lib(%struct.__tgt_bin_desc*) local_unnamed_addr | |
; Function Attrs: nounwind uwtable | |
define linkonce hidden void @.omp_offloading.descriptor_reg.nvptx64-nvida-cuda() #0 section ".text.startup" comdat { | |
entry: | |
%0 = tail call i32 @__tgt_register_lib(%struct.__tgt_bin_desc* nonnull @.omp_offloading.descriptor) #4 | |
%1 = tail call i32 @__cxa_atexit(void (i8*)* nonnull @.omp_offloading.descriptor_unreg, i8* bitcast (%struct.__tgt_bin_desc* @.omp_offloading.descriptor to i8*), i8* nonnull @__dso_handle) #4 | |
ret void | |
} | |
declare dso_local i32 @__tgt_register_lib(%struct.__tgt_bin_desc*) local_unnamed_addr | |
; Function Attrs: nounwind | |
declare dso_local i32 @__cxa_atexit(void (i8*)*, i8*, i8*) local_unnamed_addr #4 | |
attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #1 = { argmemonly nounwind } | |
attributes #2 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #4 = { nounwind } | |
!omp_offload.info = !{!0} | |
!llvm.module.flags = !{!1} | |
!llvm.ident = !{!2} | |
!0 = !{i32 0, i32 16, i32 16854481, !"main", i32 7, i32 0} | |
!1 = !{i32 1, !"wchar_size", i32 4} | |
!2 = !{!"clang version 9.0.0 (/dev/shm/jdoerfert/llvm-project/llvm/tools/clang 3bc6e2a7aa3853b06045c42e81af094647c48676)"} | |
!3 = !{!4, !4, i64 0} | |
!4 = !{!"int", !5, i64 0} | |
!5 = !{!"omnipotent char", !6, i64 0} | |
!6 = !{!"Simple C/C++ TBAA"} | |
!7 = distinct !{} | |
!8 = !{!9} | |
!9 = !{i64 2, i64 -1, i64 -1, i1 true} | |
; __CLANG_OFFLOAD_BUNDLE____END__ host-x86_64-unknown-linux-gnu |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment