-
-
Save Commaster/c8cb1928c22a2b77e358c48c01eb5163 to your computer and use it in GitHub Desktop.
[MCE] test case
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | |
target triple = "x86_64-unknown-linux-gnu" | |
%struct.FOUR_VECTOR = type { float, float, float, float } | |
%struct.par_str = type { float } | |
%struct.dim_str = type { i32, i32, i32, i32, i64, i64, i64, i64, i64 } | |
%struct.box_str = type { i32, i32, i32, i32, i64, i32, [26 x %struct.nei_str] } | |
%struct.nei_str = type { i32, i32, i32, i32, i64 } | |
$_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared = comdat any | |
$_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared = comdat any | |
$_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9qB_shared = comdat any | |
@_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared = linkonce_odr local_unnamed_addr global [100 x %struct.FOUR_VECTOR] zeroinitializer, comdat, align 16, !pacxx.as.shared !0, !pacxx.as.shared !0 | |
@_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared = linkonce_odr local_unnamed_addr global [100 x %struct.FOUR_VECTOR] zeroinitializer, comdat, align 16, !pacxx.as.shared !0, !pacxx.as.shared !0 | |
@_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9qB_shared = linkonce_odr local_unnamed_addr global [100 x float] zeroinitializer, comdat, align 16, !pacxx.as.shared !0, !pacxx.as.shared !0 | |
; Function Attrs: noinline nounwind uwtable | |
define void @"_ZN5pacxx2v213genericKernelIZ25kernel_gpu_opencl_wrapper7par_str7dim_strP7box_strP11FOUR_VECTORPfS7_E3$_0EEvT_PPKc"(%struct.par_str %callable.coerce0, %struct.dim_str %callable.coerce1, %struct.box_str* nocapture readonly %callable.coerce2, %struct.FOUR_VECTOR* nocapture readonly %callable.coerce3, float* nocapture readonly %callable.coerce4, %struct.FOUR_VECTOR* nocapture %callable.coerce5, i8** nocapture readnone %name) local_unnamed_addr #1 !pacxx.kernel !8 { | |
entry: | |
%callable.coerce0.fca.0.extract = extractvalue %struct.par_str %callable.coerce0, 0 | |
%callable.coerce1.fca.4.extract = extractvalue %struct.dim_str %callable.coerce1, 4 | |
%0 = call i32 @llvm.pacxx.read.ctaid.x() #6 | |
%conv.i.i = sext i32 %0 to i64 | |
%cmp.i.i = icmp sgt i64 %callable.coerce1.fca.4.extract, %conv.i.i | |
br i1 %cmp.i.i, label %if.then.i.i, label %"_ZN5pacxx2v210kernelBodyIRZ25kernel_gpu_opencl_wrapper7par_str7dim_strP7box_strP11FOUR_VECTORPfS7_E3$_0EEvOT_.exit" | |
if.then.i.i: ; preds = %entry | |
%mul.i.i = fmul float %callable.coerce0.fca.0.extract, 2.000000e+00 | |
%mul3.i.i = fmul float %callable.coerce0.fca.0.extract, %mul.i.i | |
%offset.i.i = getelementptr inbounds %struct.box_str, %struct.box_str* %callable.coerce2, i64 %conv.i.i, i32 4 | |
%1 = load i64, i64* %offset.i.i, align 8, !tbaa !9 | |
%2 = call i32 @llvm.pacxx.read.tid.x() #6 | |
%cmp5234.i.i = icmp slt i32 %2, 100 | |
br i1 %cmp5234.i.i, label %while.body.lr.ph.i.i, label %while.end.i.i | |
while.body.lr.ph.i.i: ; preds = %if.then.i.i | |
%3 = sext i32 %2 to i64 | |
%sext250.i.i = shl i64 %1, 32 | |
%4 = ashr exact i64 %sext250.i.i, 32 | |
%5 = icmp sgt i64 %3, -28 | |
%smax251.i.i = select i1 %5, i64 %3, i64 -28 | |
%6 = sub nsw i64 127, %3 | |
%7 = add nsw i64 %6, %smax251.i.i | |
%8 = lshr i64 %7, 7 | |
%9 = add nuw nsw i64 %8, 1 | |
%xtraiter252.i.i = and i64 %9, 3 | |
%lcmp.mod253.i.i = icmp eq i64 %xtraiter252.i.i, 0 | |
br i1 %lcmp.mod253.i.i, label %while.body.prol.loopexit.i.i, label %while.body.prol.preheader.i.i | |
while.body.prol.preheader.i.i: ; preds = %while.body.lr.ph.i.i | |
br label %while.body.prol.i.i | |
while.body.prol.i.i: ; preds = %while.body.prol.i.i.while.body.prol.i.i_crit_edge, %while.body.prol.preheader.i.i | |
%indvars.iv245.prol.i.i = phi i64 [ %3, %while.body.prol.preheader.i.i ], [ %indvars.iv.next246.prol.i.i, %while.body.prol.i.i.while.body.prol.i.i_crit_edge ] | |
%prol.iter.i.i = phi i64 [ %xtraiter252.i.i, %while.body.prol.preheader.i.i ], [ %prol.iter.sub.i.i, %while.body.prol.i.i.while.body.prol.i.i_crit_edge ] | |
%10 = add nsw i64 %indvars.iv245.prol.i.i, %4 | |
%arrayidx7.prol.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %10 | |
%arrayidx9.prol.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv245.prol.i.i | |
%11 = bitcast %struct.FOUR_VECTOR* %arrayidx9.prol.i.i to i8* | |
%12 = bitcast %struct.FOUR_VECTOR* %arrayidx7.prol.i.i to i8* | |
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %11, i8* %12, i64 16, i32 4, i1 false), !tbaa.struct !15 | |
%indvars.iv.next246.prol.i.i = add nsw i64 %indvars.iv245.prol.i.i, 128 | |
%prol.iter.sub.i.i = add i64 %prol.iter.i.i, -1 | |
%prol.iter.cmp.i.i = icmp eq i64 %prol.iter.sub.i.i, 0 | |
br i1 %prol.iter.cmp.i.i, label %while.body.prol.loopexit.i.i, label %while.body.prol.i.i.while.body.prol.i.i_crit_edge, !llvm.loop !18 | |
while.body.prol.i.i.while.body.prol.i.i_crit_edge: ; preds = %while.body.prol.i.i | |
br label %while.body.prol.i.i | |
while.body.prol.loopexit.i.i: ; preds = %while.body.prol.i.i, %while.body.lr.ph.i.i | |
%indvars.iv245.unr.i.i = phi i64 [ %3, %while.body.lr.ph.i.i ], [ %indvars.iv.next246.prol.i.i, %while.body.prol.i.i ] | |
%13 = icmp ult i64 %7, 384 | |
br i1 %13, label %while.end.i.i, label %while.body.lr.ph.new.i.i | |
while.body.lr.ph.new.i.i: ; preds = %while.body.prol.loopexit.i.i | |
br label %while.body.i.i | |
while.body.i.i: ; preds = %while.body.i.i.while.body.i.i_crit_edge, %while.body.lr.ph.new.i.i | |
%indvars.iv245.i.i = phi i64 [ %indvars.iv245.unr.i.i, %while.body.lr.ph.new.i.i ], [ %indvars.iv.next246.3.i.i, %while.body.i.i.while.body.i.i_crit_edge ] | |
%14 = add nsw i64 %indvars.iv245.i.i, %4 | |
%arrayidx7.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %14 | |
%arrayidx9.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv245.i.i | |
%15 = bitcast %struct.FOUR_VECTOR* %arrayidx9.i.i to i8* | |
%16 = bitcast %struct.FOUR_VECTOR* %arrayidx7.i.i to i8* | |
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %15, i8* %16, i64 16, i32 4, i1 false), !tbaa.struct !15 | |
%indvars.iv.next246.i.i = add nsw i64 %indvars.iv245.i.i, 128 | |
%17 = add nsw i64 %indvars.iv.next246.i.i, %4 | |
%arrayidx7.1.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %17 | |
%arrayidx9.1.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv.next246.i.i | |
%18 = bitcast %struct.FOUR_VECTOR* %arrayidx9.1.i.i to i8* | |
%19 = bitcast %struct.FOUR_VECTOR* %arrayidx7.1.i.i to i8* | |
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %18, i8* %19, i64 16, i32 4, i1 false), !tbaa.struct !15 | |
%indvars.iv.next246.1.i.i = add nsw i64 %indvars.iv245.i.i, 256 | |
%20 = add nsw i64 %indvars.iv.next246.1.i.i, %4 | |
%arrayidx7.2.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %20 | |
%arrayidx9.2.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv.next246.1.i.i | |
%21 = bitcast %struct.FOUR_VECTOR* %arrayidx9.2.i.i to i8* | |
%22 = bitcast %struct.FOUR_VECTOR* %arrayidx7.2.i.i to i8* | |
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %21, i8* %22, i64 16, i32 4, i1 false), !tbaa.struct !15 | |
%indvars.iv.next246.2.i.i = add nsw i64 %indvars.iv245.i.i, 384 | |
%23 = add nsw i64 %indvars.iv.next246.2.i.i, %4 | |
%arrayidx7.3.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %23 | |
%arrayidx9.3.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv.next246.2.i.i | |
%24 = bitcast %struct.FOUR_VECTOR* %arrayidx9.3.i.i to i8* | |
%25 = bitcast %struct.FOUR_VECTOR* %arrayidx7.3.i.i to i8* | |
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %24, i8* %25, i64 16, i32 4, i1 false), !tbaa.struct !15 | |
%cmp5.3.i.i = icmp slt i64 %indvars.iv245.i.i, -412 | |
br i1 %cmp5.3.i.i, label %while.body.i.i.while.body.i.i_crit_edge, label %while.end.i.i | |
while.body.i.i.while.body.i.i_crit_edge: ; preds = %while.body.i.i | |
%indvars.iv.next246.3.i.i = add nsw i64 %indvars.iv245.i.i, 512 | |
br label %while.body.i.i | |
while.end.i.i: ; preds = %while.body.i.i, %while.body.prol.loopexit.i.i, %if.then.i.i | |
call void @llvm.pacxx.barrier0() #6 | |
%nn.i.i = getelementptr inbounds %struct.box_str, %struct.box_str* %callable.coerce2, i64 %conv.i.i, i32 5 | |
%26 = load i32, i32* %nn.i.i, align 8, !tbaa !20 | |
%cmp14232.i.i = icmp slt i32 %26, 0 | |
br i1 %cmp14232.i.i, label %"_ZN5pacxx2v210kernelBodyIRZ25kernel_gpu_opencl_wrapper7par_str7dim_strP7box_strP11FOUR_VECTORPfS7_E3$_0EEvOT_.exit", label %for.body.lr.ph.i.i | |
for.body.lr.ph.i.i: ; preds = %while.end.i.i | |
%27 = sext i32 %2 to i64 | |
%sext.i.i = shl i64 %1, 32 | |
%28 = ashr exact i64 %sext.i.i, 32 | |
%29 = icmp sgt i64 %27, -28 | |
%smax.i.i = select i1 %29, i64 %27, i64 -28 | |
%30 = sub nsw i64 127, %27 | |
%31 = add nsw i64 %30, %smax.i.i | |
%32 = trunc i64 %31 to i8 | |
%lcmp.mod.i.i = icmp sgt i8 %32, -1 | |
%arrayidx32.prol.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared, i64 0, i64 %27 | |
%33 = bitcast %struct.FOUR_VECTOR* %arrayidx32.prol.i.i to i8* | |
%arrayidx37.prol.i.i = getelementptr inbounds [100 x float], [100 x float]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9qB_shared, i64 0, i64 %27 | |
%indvars.iv.next.prol.i.i = add nsw i64 %27, 128 | |
%34 = icmp ult i64 %31, 128 | |
br label %for.body.i.i | |
for.body.i.i: ; preds = %while.end140.i.i.for.body.i.i_crit_edge, %for.body.lr.ph.i.i | |
%indvars.iv242.i.i = phi i64 [ %indvars.iv.next243.i.i, %while.end140.i.i.for.body.i.i_crit_edge ], [ 0, %for.body.lr.ph.i.i ] | |
%cmp15.i.i = icmp eq i64 %indvars.iv242.i.i, 0 | |
br i1 %cmp15.i.i, label %if.end.i.i, label %if.else.i.i | |
if.else.i.i: ; preds = %for.body.i.i | |
%35 = add nsw i64 %indvars.iv242.i.i, -1 | |
%number.i.i = getelementptr inbounds %struct.box_str, %struct.box_str* %callable.coerce2, i64 %conv.i.i, i32 6, i64 %35, i32 3 | |
%36 = load i32, i32* %number.i.i, align 4, !tbaa !21 | |
%.pre.i.i = sext i32 %36 to i64 | |
%.pre248.i.i = getelementptr inbounds %struct.box_str, %struct.box_str* %callable.coerce2, i64 %.pre.i.i, i32 4 | |
br label %if.end.i.i | |
if.end.i.i: ; preds = %if.else.i.i, %for.body.i.i | |
%offset23.pre-phi.i.i = phi i64* [ %.pre248.i.i, %if.else.i.i ], [ %offset.i.i, %for.body.i.i ] | |
br i1 %cmp5234.i.i, label %while.body27.lr.ph.i.i, label %while.end39.i.i | |
while.body27.lr.ph.i.i: ; preds = %if.end.i.i | |
%37 = load i64, i64* %offset23.pre-phi.i.i, align 8, !tbaa !9 | |
%sext249.i.i = shl i64 %37, 32 | |
%38 = ashr exact i64 %sext249.i.i, 32 | |
br i1 %lcmp.mod.i.i, label %while.body27.prol.i.i, label %while.body27.prol.loopexit.i.i | |
while.body27.prol.i.i: ; preds = %while.body27.lr.ph.i.i | |
%39 = add nsw i64 %38, %27 | |
%arrayidx30.prol.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %39 | |
%40 = bitcast %struct.FOUR_VECTOR* %arrayidx30.prol.i.i to i8* | |
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %33, i8* %40, i64 16, i32 4, i1 false), !tbaa.struct !15 | |
%arrayidx35.prol.i.i = getelementptr inbounds float, float* %callable.coerce4, i64 %39 | |
%41 = load float, float* %arrayidx35.prol.i.i | |
store float %41, float* %arrayidx37.prol.i.i | |
br label %while.body27.prol.loopexit.i.i | |
while.body27.prol.loopexit.i.i: ; preds = %while.body27.prol.i.i, %while.body27.lr.ph.i.i | |
%indvars.iv.unr.ph.i.i = phi i64 [ %indvars.iv.next.prol.i.i, %while.body27.prol.i.i ], [ %27, %while.body27.lr.ph.i.i ] | |
br i1 %34, label %while.end39.i.i, label %while.body27.lr.ph.new.i.i | |
while.body27.lr.ph.new.i.i: ; preds = %while.body27.prol.loopexit.i.i | |
br label %while.body27.i.i | |
while.body27.i.i: ; preds = %while.body27.i.i.while.body27.i.i_crit_edge, %while.body27.lr.ph.new.i.i | |
%indvars.iv.i.i = phi i64 [ %indvars.iv.unr.ph.i.i, %while.body27.lr.ph.new.i.i ], [ %indvars.iv.next.1.i.i, %while.body27.i.i.while.body27.i.i_crit_edge ] | |
%42 = add nsw i64 %indvars.iv.i.i, %38 | |
%arrayidx30.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %42 | |
%arrayidx32.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared, i64 0, i64 %indvars.iv.i.i | |
%43 = bitcast %struct.FOUR_VECTOR* %arrayidx32.i.i to i8* | |
%44 = bitcast %struct.FOUR_VECTOR* %arrayidx30.i.i to i8* | |
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %43, i8* %44, i64 16, i32 4, i1 false), !tbaa.struct !15 | |
%arrayidx35.i.i = getelementptr inbounds float, float* %callable.coerce4, i64 %42 | |
%45 = load float, float* %arrayidx35.i.i | |
%arrayidx37.i.i = getelementptr inbounds [100 x float], [100 x float]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9qB_shared, i64 0, i64 %indvars.iv.i.i | |
store float %45, float* %arrayidx37.i.i | |
%indvars.iv.next.i.i = add nsw i64 %indvars.iv.i.i, 128 | |
%46 = add nsw i64 %indvars.iv.next.i.i, %38 | |
%arrayidx30.1.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %46 | |
%arrayidx32.1.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared, i64 0, i64 %indvars.iv.next.i.i | |
%47 = bitcast %struct.FOUR_VECTOR* %arrayidx32.1.i.i to i8* | |
%48 = bitcast %struct.FOUR_VECTOR* %arrayidx30.1.i.i to i8* | |
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %47, i8* %48, i64 16, i32 4, i1 false), !tbaa.struct !15 | |
%arrayidx35.1.i.i = getelementptr inbounds float, float* %callable.coerce4, i64 %46 | |
%49 = load float, float* %arrayidx35.1.i.i | |
%arrayidx37.1.i.i = getelementptr inbounds [100 x float], [100 x float]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9qB_shared, i64 0, i64 %indvars.iv.next.i.i | |
store float %49, float* %arrayidx37.1.i.i | |
%cmp26.1.i.i = icmp slt i64 %indvars.iv.i.i, -156 | |
br i1 %cmp26.1.i.i, label %while.body27.i.i.while.body27.i.i_crit_edge, label %while.end39.i.i | |
while.body27.i.i.while.body27.i.i_crit_edge: ; preds = %while.body27.i.i | |
%indvars.iv.next.1.i.i = add nsw i64 %indvars.iv.i.i, 256 | |
br label %while.body27.i.i | |
while.end39.i.i: ; preds = %while.body27.i.i, %while.body27.prol.loopexit.i.i, %if.end.i.i | |
%cmp5234.pr.i.i = phi i1 [ false, %if.end.i.i ], [ true, %while.body27.prol.loopexit.i.i ], [ true, %while.body27.i.i ] | |
call void @llvm.pacxx.barrier0() #6 | |
br i1 %cmp5234.pr.i.i, label %while.body42.preheader.i.i, label %while.end140.i.i | |
while.body42.preheader.i.i: ; preds = %while.end39.i.i | |
br label %while.body42.i.i | |
while.body42.i.i: ; preds = %for.end.i.i.while.body42.i.i_crit_edge, %while.body42.preheader.i.i | |
%indvars.iv239.i.i = phi i64 [ %indvars.iv.next240.i.i, %for.end.i.i.while.body42.i.i_crit_edge ], [ %27, %while.body42.preheader.i.i ] | |
%x.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv239.i.i, i32 0 | |
%y.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv239.i.i, i32 1 | |
%z.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv239.i.i, i32 2 | |
%50 = add nsw i64 %indvars.iv239.i.i, %28 | |
%x113.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce5, i64 %50, i32 0 | |
%y121.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce5, i64 %50, i32 1 | |
%z129.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce5, i64 %50, i32 2 | |
%w137.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce5, i64 %50, i32 3 | |
br label %for.body45.i.i | |
for.body45.i.i: ; preds = %for.body45.i.i.for.body45.i.i_crit_edge, %while.body42.i.i | |
%indvars.iv237.i.i = phi i64 [ 0, %while.body42.i.i ], [ %indvars.iv.next238.i.i, %for.body45.i.i.for.body45.i.i_crit_edge ] | |
%51 = load float, float* %x.i.i, align 16, !tbaa !23 | |
%x50.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared, i64 0, i64 %indvars.iv237.i.i, i32 0 | |
%52 = load float, float* %x50.i.i, align 16, !tbaa !23 | |
%add51.i.i = fadd float %51, %52 | |
%mul58.i.i = fmul float %51, %52 | |
%53 = load float, float* %y.i.i, align 4, !tbaa !25 | |
%y63.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared, i64 0, i64 %indvars.iv237.i.i, i32 1 | |
%54 = load float, float* %y63.i.i, align 4, !tbaa !25 | |
%mul64.i.i = fmul float %53, %54 | |
%add65.i.i = fadd float %mul58.i.i, %mul64.i.i | |
%55 = load float, float* %z.i.i, align 8, !tbaa !26 | |
%z70.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared, i64 0, i64 %indvars.iv237.i.i, i32 2 | |
%56 = load float, float* %z70.i.i, align 8, !tbaa !26 | |
%mul71.i.i = fmul float %55, %56 | |
%add72.i.i = fadd float %add65.i.i, %mul71.i.i | |
%sub73.i.i = fsub float %add51.i.i, %add72.i.i | |
%mul74.i.i = fmul float %mul3.i.i, %sub73.i.i | |
%sub75.i.i = fsub float -0.000000e+00, %mul74.i.i | |
%call76.i.i = call float @expf(float %sub75.i.i) #6 | |
%mul77.i.i = fmul float %call76.i.i, 2.000000e+00 | |
%57 = load float, float* %x.i.i, align 16, !tbaa !23 | |
%58 = load float, float* %y63.i.i, align 4, !tbaa !25 | |
%sub84.i.i = fsub float %57, %58 | |
%mul87.i.i = fmul float %mul77.i.i, %sub84.i.i | |
%59 = load float, float* %y.i.i, align 4, !tbaa !25 | |
%60 = load float, float* %z70.i.i, align 8, !tbaa !26 | |
%sub94.i.i = fsub float %59, %60 | |
%mul97.i.i = fmul float %mul77.i.i, %sub94.i.i | |
%61 = load float, float* %z.i.i, align 8, !tbaa !26 | |
%w.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared, i64 0, i64 %indvars.iv237.i.i, i32 3 | |
%62 = load float, float* %w.i.i, align 4, !tbaa !27 | |
%sub103.i.i = fsub float %61, %62 | |
%mul106.i.i = fmul float %mul77.i.i, %sub103.i.i | |
%arrayidx108.i.i = getelementptr inbounds [100 x float], [100 x float]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9qB_shared, i64 0, i64 %indvars.iv237.i.i | |
%63 = load float, float* %arrayidx108.i.i, align 4, !tbaa !16 | |
%mul109.i.i = fmul float %call76.i.i, %63 | |
%64 = load float, float* %x113.i.i, align 4, !tbaa !23 | |
%add114.i.i = fadd float %64, %mul109.i.i | |
store float %add114.i.i, float* %x113.i.i, align 4, !tbaa !23 | |
%65 = load float, float* %arrayidx108.i.i, align 4, !tbaa !16 | |
%mul117.i.i = fmul float %mul87.i.i, %65 | |
%66 = load float, float* %y121.i.i, align 4, !tbaa !25 | |
%add122.i.i = fadd float %66, %mul117.i.i | |
store float %add122.i.i, float* %y121.i.i, align 4, !tbaa !25 | |
%67 = load float, float* %arrayidx108.i.i, align 4, !tbaa !16 | |
%mul125.i.i = fmul float %mul97.i.i, %67 | |
%68 = load float, float* %z129.i.i, align 4, !tbaa !26 | |
%add130.i.i = fadd float %68, %mul125.i.i | |
store float %add130.i.i, float* %z129.i.i, align 4, !tbaa !26 | |
%69 = load float, float* %arrayidx108.i.i, align 4, !tbaa !16 | |
%mul133.i.i = fmul float %mul106.i.i, %69 | |
%70 = load float, float* %w137.i.i, align 4, !tbaa !27 | |
%add138.i.i = fadd float %70, %mul133.i.i | |
store float %add138.i.i, float* %w137.i.i, align 4, !tbaa !27 | |
%indvars.iv.next238.i.i = add nuw nsw i64 %indvars.iv237.i.i, 1 | |
%exitcond.i.i = icmp eq i64 %indvars.iv.next238.i.i, 100 | |
br i1 %exitcond.i.i, label %for.end.i.i, label %for.body45.i.i.for.body45.i.i_crit_edge | |
for.body45.i.i.for.body45.i.i_crit_edge: ; preds = %for.body45.i.i | |
br label %for.body45.i.i | |
for.end.i.i: ; preds = %for.body45.i.i | |
%cmp41.i.i = icmp slt i64 %indvars.iv239.i.i, -28 | |
br i1 %cmp41.i.i, label %for.end.i.i.while.body42.i.i_crit_edge, label %while.end140.i.i | |
for.end.i.i.while.body42.i.i_crit_edge: ; preds = %for.end.i.i | |
%indvars.iv.next240.i.i = add nsw i64 %indvars.iv239.i.i, 128 | |
br label %while.body42.i.i | |
while.end140.i.i: ; preds = %for.end.i.i, %while.end39.i.i | |
call void @llvm.pacxx.barrier0() #6 | |
%71 = load i32, i32* %nn.i.i, align 8, !tbaa !20 | |
%72 = sext i32 %71 to i64 | |
%cmp14.i.i = icmp slt i64 %indvars.iv242.i.i, %72 | |
br i1 %cmp14.i.i, label %while.end140.i.i.for.body.i.i_crit_edge, label %"_ZN5pacxx2v210kernelBodyIRZ25kernel_gpu_opencl_wrapper7par_str7dim_strP7box_strP11FOUR_VECTORPfS7_E3$_0EEvOT_.exit" | |
while.end140.i.i.for.body.i.i_crit_edge: ; preds = %while.end140.i.i | |
%indvars.iv.next243.i.i = add nuw nsw i64 %indvars.iv242.i.i, 1 | |
br label %for.body.i.i | |
"_ZN5pacxx2v210kernelBodyIRZ25kernel_gpu_opencl_wrapper7par_str7dim_strP7box_strP11FOUR_VECTORPfS7_E3$_0EEvOT_.exit": ; preds = %while.end140.i.i, %while.end.i.i, %entry | |
ret void | |
; uselistorder directives | |
uselistorder [100 x %struct.FOUR_VECTOR] zeroinitializer, { 1, 0 } | |
} | |
; Function Attrs: alwaysinline nounwind readnone | |
declare i32 @llvm.pacxx.read.ctaid.x() #2 | |
; Function Attrs: alwaysinline nounwind readnone | |
declare i32 @llvm.pacxx.read.tid.x() #2 | |
; Function Attrs: alwaysinline argmemonly nounwind | |
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #3 | |
; Function Attrs: alwaysinline convergent nounwind | |
declare void @llvm.pacxx.barrier0() #4 | |
; Function Attrs: alwaysinline nounwind | |
declare float @expf(float) local_unnamed_addr #5 | |
attributes #0 = { "target-cpu"="haswell" "target-features"="+sse2,+cx16,-tbm,-avx512ifma,-gfni,-sha,-fma4,-vpclmulqdq,-prfchw,+bmi2,-xsavec,+fsgsbase,+popcnt,+aes,-avx512bitalg,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-xop,-rdseed,-ibt,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,-adx,-avx512pf,+sse3" } | |
attributes #1 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+sse2,+cx16,-tbm,-avx512ifma,-gfni,-sha,-fma4,-vpclmulqdq,-prfchw,+bmi2,-xsavec,+fsgsbase,+popcnt,+aes,-avx512bitalg,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-xop,-rdseed,-ibt,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,-adx,-avx512pf,+sse3" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #2 = { alwaysinline nounwind readnone "target-cpu"="haswell" "target-features"="+sse2,+cx16,-tbm,-avx512ifma,-gfni,-sha,-fma4,-vpclmulqdq,-prfchw,+bmi2,-xsavec,+fsgsbase,+popcnt,+aes,-avx512bitalg,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-xop,-rdseed,-ibt,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,-adx,-avx512pf,+sse3" } | |
attributes #3 = { alwaysinline argmemonly nounwind "target-cpu"="haswell" "target-features"="+sse2,+cx16,-tbm,-avx512ifma,-gfni,-sha,-fma4,-vpclmulqdq,-prfchw,+bmi2,-xsavec,+fsgsbase,+popcnt,+aes,-avx512bitalg,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-xop,-rdseed,-ibt,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,-adx,-avx512pf,+sse3" } | |
attributes #4 = { alwaysinline convergent nounwind "target-cpu"="haswell" "target-features"="+sse2,+cx16,-tbm,-avx512ifma,-gfni,-sha,-fma4,-vpclmulqdq,-prfchw,+bmi2,-xsavec,+fsgsbase,+popcnt,+aes,-avx512bitalg,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-xop,-rdseed,-ibt,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,-adx,-avx512pf,+sse3" } | |
attributes #5 = { alwaysinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+sse2,+cx16,-tbm,-avx512ifma,-gfni,-sha,-fma4,-vpclmulqdq,-prfchw,+bmi2,-xsavec,+fsgsbase,+popcnt,+aes,-avx512bitalg,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-xop,-rdseed,-ibt,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,-adx,-avx512pf,+sse3" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #6 = { nounwind } | |
!llvm.ident = !{!1, !2, !2} | |
!pacxx.kernel = !{!3} | |
!llvm.module.flags = !{!4} | |
!0 = !{} | |
!1 = !{!"PACXX"} | |
!2 = !{!"clang version 6.0.1"} | |
!3 = !{void (%struct.par_str, %struct.dim_str, %struct.box_str*, %struct.FOUR_VECTOR*, float*, %struct.FOUR_VECTOR*, i8**)* @"_ZN5pacxx2v213genericKernelIZ25kernel_gpu_opencl_wrapper7par_str7dim_strP7box_strP11FOUR_VECTORPfS7_E3$_0EEvT_PPKc"} | |
!4 = !{i32 1, !"wchar_size", i32 4} | |
!5 = !{null} | |
!6 = distinct !{!6, !7} | |
!7 = !{!"llvm.loop.vectorize.enable", i1 false} | |
!8 = !{!"genericKernel"} | |
!9 = !{!10, !14, i64 16} | |
!10 = !{!"_ZTS7box_str", !11, i64 0, !11, i64 4, !11, i64 8, !11, i64 12, !14, i64 16, !11, i64 24, !12, i64 32} | |
!11 = !{!"int", !12, i64 0} | |
!12 = !{!"omnipotent char", !13, i64 0} | |
!13 = !{!"Simple C++ TBAA"} | |
!14 = !{!"long", !12, i64 0} | |
!15 = !{i64 0, i64 4, !16, i64 4, i64 4, !16, i64 8, i64 4, !16, i64 12, i64 4, !16} | |
!16 = !{!17, !17, i64 0} | |
!17 = !{!"float", !12, i64 0} | |
!18 = distinct !{!18, !19} | |
!19 = !{!"llvm.loop.unroll.disable"} | |
!20 = !{!10, !11, i64 24} | |
!21 = !{!22, !11, i64 12} | |
!22 = !{!"_ZTS7nei_str", !11, i64 0, !11, i64 4, !11, i64 8, !11, i64 12, !14, i64 16} | |
!23 = !{!24, !17, i64 0} | |
!24 = !{!"_ZTS11FOUR_VECTOR", !17, i64 0, !17, i64 4, !17, i64 8, !17, i64 12} | |
!25 = !{!24, !17, i64 4} | |
!26 = !{!24, !17, i64 8} | |
!27 = !{!24, !17, i64 12} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment