Skip to content

Instantly share code, notes, and snippets.

@Commaster
Created August 10, 2018 11:51
Show Gist options
  • Save Commaster/c8cb1928c22a2b77e358c48c01eb5163 to your computer and use it in GitHub Desktop.
Save Commaster/c8cb1928c22a2b77e358c48c01eb5163 to your computer and use it in GitHub Desktop.
[MCE] test case
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.FOUR_VECTOR = type { float, float, float, float }
%struct.par_str = type { float }
%struct.dim_str = type { i32, i32, i32, i32, i64, i64, i64, i64, i64 }
%struct.box_str = type { i32, i32, i32, i32, i64, i32, [26 x %struct.nei_str] }
%struct.nei_str = type { i32, i32, i32, i32, i64 }
$_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared = comdat any
$_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared = comdat any
$_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9qB_shared = comdat any
@_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared = linkonce_odr local_unnamed_addr global [100 x %struct.FOUR_VECTOR] zeroinitializer, comdat, align 16, !pacxx.as.shared !0, !pacxx.as.shared !0
@_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared = linkonce_odr local_unnamed_addr global [100 x %struct.FOUR_VECTOR] zeroinitializer, comdat, align 16, !pacxx.as.shared !0, !pacxx.as.shared !0
@_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9qB_shared = linkonce_odr local_unnamed_addr global [100 x float] zeroinitializer, comdat, align 16, !pacxx.as.shared !0, !pacxx.as.shared !0
; Function Attrs: noinline nounwind uwtable
define void @"_ZN5pacxx2v213genericKernelIZ25kernel_gpu_opencl_wrapper7par_str7dim_strP7box_strP11FOUR_VECTORPfS7_E3$_0EEvT_PPKc"(%struct.par_str %callable.coerce0, %struct.dim_str %callable.coerce1, %struct.box_str* nocapture readonly %callable.coerce2, %struct.FOUR_VECTOR* nocapture readonly %callable.coerce3, float* nocapture readonly %callable.coerce4, %struct.FOUR_VECTOR* nocapture %callable.coerce5, i8** nocapture readnone %name) local_unnamed_addr #1 !pacxx.kernel !8 {
entry:
%callable.coerce0.fca.0.extract = extractvalue %struct.par_str %callable.coerce0, 0
%callable.coerce1.fca.4.extract = extractvalue %struct.dim_str %callable.coerce1, 4
%0 = call i32 @llvm.pacxx.read.ctaid.x() #6
%conv.i.i = sext i32 %0 to i64
%cmp.i.i = icmp sgt i64 %callable.coerce1.fca.4.extract, %conv.i.i
br i1 %cmp.i.i, label %if.then.i.i, label %"_ZN5pacxx2v210kernelBodyIRZ25kernel_gpu_opencl_wrapper7par_str7dim_strP7box_strP11FOUR_VECTORPfS7_E3$_0EEvOT_.exit"
if.then.i.i: ; preds = %entry
%mul.i.i = fmul float %callable.coerce0.fca.0.extract, 2.000000e+00
%mul3.i.i = fmul float %callable.coerce0.fca.0.extract, %mul.i.i
%offset.i.i = getelementptr inbounds %struct.box_str, %struct.box_str* %callable.coerce2, i64 %conv.i.i, i32 4
%1 = load i64, i64* %offset.i.i, align 8, !tbaa !9
%2 = call i32 @llvm.pacxx.read.tid.x() #6
%cmp5234.i.i = icmp slt i32 %2, 100
br i1 %cmp5234.i.i, label %while.body.lr.ph.i.i, label %while.end.i.i
while.body.lr.ph.i.i: ; preds = %if.then.i.i
%3 = sext i32 %2 to i64
%sext250.i.i = shl i64 %1, 32
%4 = ashr exact i64 %sext250.i.i, 32
%5 = icmp sgt i64 %3, -28
%smax251.i.i = select i1 %5, i64 %3, i64 -28
%6 = sub nsw i64 127, %3
%7 = add nsw i64 %6, %smax251.i.i
%8 = lshr i64 %7, 7
%9 = add nuw nsw i64 %8, 1
%xtraiter252.i.i = and i64 %9, 3
%lcmp.mod253.i.i = icmp eq i64 %xtraiter252.i.i, 0
br i1 %lcmp.mod253.i.i, label %while.body.prol.loopexit.i.i, label %while.body.prol.preheader.i.i
while.body.prol.preheader.i.i: ; preds = %while.body.lr.ph.i.i
br label %while.body.prol.i.i
while.body.prol.i.i: ; preds = %while.body.prol.i.i.while.body.prol.i.i_crit_edge, %while.body.prol.preheader.i.i
%indvars.iv245.prol.i.i = phi i64 [ %3, %while.body.prol.preheader.i.i ], [ %indvars.iv.next246.prol.i.i, %while.body.prol.i.i.while.body.prol.i.i_crit_edge ]
%prol.iter.i.i = phi i64 [ %xtraiter252.i.i, %while.body.prol.preheader.i.i ], [ %prol.iter.sub.i.i, %while.body.prol.i.i.while.body.prol.i.i_crit_edge ]
%10 = add nsw i64 %indvars.iv245.prol.i.i, %4
%arrayidx7.prol.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %10
%arrayidx9.prol.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv245.prol.i.i
%11 = bitcast %struct.FOUR_VECTOR* %arrayidx9.prol.i.i to i8*
%12 = bitcast %struct.FOUR_VECTOR* %arrayidx7.prol.i.i to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %11, i8* %12, i64 16, i32 4, i1 false), !tbaa.struct !15
%indvars.iv.next246.prol.i.i = add nsw i64 %indvars.iv245.prol.i.i, 128
%prol.iter.sub.i.i = add i64 %prol.iter.i.i, -1
%prol.iter.cmp.i.i = icmp eq i64 %prol.iter.sub.i.i, 0
br i1 %prol.iter.cmp.i.i, label %while.body.prol.loopexit.i.i, label %while.body.prol.i.i.while.body.prol.i.i_crit_edge, !llvm.loop !18
while.body.prol.i.i.while.body.prol.i.i_crit_edge: ; preds = %while.body.prol.i.i
br label %while.body.prol.i.i
while.body.prol.loopexit.i.i: ; preds = %while.body.prol.i.i, %while.body.lr.ph.i.i
%indvars.iv245.unr.i.i = phi i64 [ %3, %while.body.lr.ph.i.i ], [ %indvars.iv.next246.prol.i.i, %while.body.prol.i.i ]
%13 = icmp ult i64 %7, 384
br i1 %13, label %while.end.i.i, label %while.body.lr.ph.new.i.i
while.body.lr.ph.new.i.i: ; preds = %while.body.prol.loopexit.i.i
br label %while.body.i.i
while.body.i.i: ; preds = %while.body.i.i.while.body.i.i_crit_edge, %while.body.lr.ph.new.i.i
%indvars.iv245.i.i = phi i64 [ %indvars.iv245.unr.i.i, %while.body.lr.ph.new.i.i ], [ %indvars.iv.next246.3.i.i, %while.body.i.i.while.body.i.i_crit_edge ]
%14 = add nsw i64 %indvars.iv245.i.i, %4
%arrayidx7.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %14
%arrayidx9.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv245.i.i
%15 = bitcast %struct.FOUR_VECTOR* %arrayidx9.i.i to i8*
%16 = bitcast %struct.FOUR_VECTOR* %arrayidx7.i.i to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %15, i8* %16, i64 16, i32 4, i1 false), !tbaa.struct !15
%indvars.iv.next246.i.i = add nsw i64 %indvars.iv245.i.i, 128
%17 = add nsw i64 %indvars.iv.next246.i.i, %4
%arrayidx7.1.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %17
%arrayidx9.1.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv.next246.i.i
%18 = bitcast %struct.FOUR_VECTOR* %arrayidx9.1.i.i to i8*
%19 = bitcast %struct.FOUR_VECTOR* %arrayidx7.1.i.i to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %18, i8* %19, i64 16, i32 4, i1 false), !tbaa.struct !15
%indvars.iv.next246.1.i.i = add nsw i64 %indvars.iv245.i.i, 256
%20 = add nsw i64 %indvars.iv.next246.1.i.i, %4
%arrayidx7.2.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %20
%arrayidx9.2.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv.next246.1.i.i
%21 = bitcast %struct.FOUR_VECTOR* %arrayidx9.2.i.i to i8*
%22 = bitcast %struct.FOUR_VECTOR* %arrayidx7.2.i.i to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %21, i8* %22, i64 16, i32 4, i1 false), !tbaa.struct !15
%indvars.iv.next246.2.i.i = add nsw i64 %indvars.iv245.i.i, 384
%23 = add nsw i64 %indvars.iv.next246.2.i.i, %4
%arrayidx7.3.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %23
%arrayidx9.3.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv.next246.2.i.i
%24 = bitcast %struct.FOUR_VECTOR* %arrayidx9.3.i.i to i8*
%25 = bitcast %struct.FOUR_VECTOR* %arrayidx7.3.i.i to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %24, i8* %25, i64 16, i32 4, i1 false), !tbaa.struct !15
%cmp5.3.i.i = icmp slt i64 %indvars.iv245.i.i, -412
br i1 %cmp5.3.i.i, label %while.body.i.i.while.body.i.i_crit_edge, label %while.end.i.i
while.body.i.i.while.body.i.i_crit_edge: ; preds = %while.body.i.i
%indvars.iv.next246.3.i.i = add nsw i64 %indvars.iv245.i.i, 512
br label %while.body.i.i
while.end.i.i: ; preds = %while.body.i.i, %while.body.prol.loopexit.i.i, %if.then.i.i
call void @llvm.pacxx.barrier0() #6
%nn.i.i = getelementptr inbounds %struct.box_str, %struct.box_str* %callable.coerce2, i64 %conv.i.i, i32 5
%26 = load i32, i32* %nn.i.i, align 8, !tbaa !20
%cmp14232.i.i = icmp slt i32 %26, 0
br i1 %cmp14232.i.i, label %"_ZN5pacxx2v210kernelBodyIRZ25kernel_gpu_opencl_wrapper7par_str7dim_strP7box_strP11FOUR_VECTORPfS7_E3$_0EEvOT_.exit", label %for.body.lr.ph.i.i
for.body.lr.ph.i.i: ; preds = %while.end.i.i
%27 = sext i32 %2 to i64
%sext.i.i = shl i64 %1, 32
%28 = ashr exact i64 %sext.i.i, 32
%29 = icmp sgt i64 %27, -28
%smax.i.i = select i1 %29, i64 %27, i64 -28
%30 = sub nsw i64 127, %27
%31 = add nsw i64 %30, %smax.i.i
%32 = trunc i64 %31 to i8
%lcmp.mod.i.i = icmp sgt i8 %32, -1
%arrayidx32.prol.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared, i64 0, i64 %27
%33 = bitcast %struct.FOUR_VECTOR* %arrayidx32.prol.i.i to i8*
%arrayidx37.prol.i.i = getelementptr inbounds [100 x float], [100 x float]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9qB_shared, i64 0, i64 %27
%indvars.iv.next.prol.i.i = add nsw i64 %27, 128
%34 = icmp ult i64 %31, 128
br label %for.body.i.i
for.body.i.i: ; preds = %while.end140.i.i.for.body.i.i_crit_edge, %for.body.lr.ph.i.i
%indvars.iv242.i.i = phi i64 [ %indvars.iv.next243.i.i, %while.end140.i.i.for.body.i.i_crit_edge ], [ 0, %for.body.lr.ph.i.i ]
%cmp15.i.i = icmp eq i64 %indvars.iv242.i.i, 0
br i1 %cmp15.i.i, label %if.end.i.i, label %if.else.i.i
if.else.i.i: ; preds = %for.body.i.i
%35 = add nsw i64 %indvars.iv242.i.i, -1
%number.i.i = getelementptr inbounds %struct.box_str, %struct.box_str* %callable.coerce2, i64 %conv.i.i, i32 6, i64 %35, i32 3
%36 = load i32, i32* %number.i.i, align 4, !tbaa !21
%.pre.i.i = sext i32 %36 to i64
%.pre248.i.i = getelementptr inbounds %struct.box_str, %struct.box_str* %callable.coerce2, i64 %.pre.i.i, i32 4
br label %if.end.i.i
if.end.i.i: ; preds = %if.else.i.i, %for.body.i.i
%offset23.pre-phi.i.i = phi i64* [ %.pre248.i.i, %if.else.i.i ], [ %offset.i.i, %for.body.i.i ]
br i1 %cmp5234.i.i, label %while.body27.lr.ph.i.i, label %while.end39.i.i
while.body27.lr.ph.i.i: ; preds = %if.end.i.i
%37 = load i64, i64* %offset23.pre-phi.i.i, align 8, !tbaa !9
%sext249.i.i = shl i64 %37, 32
%38 = ashr exact i64 %sext249.i.i, 32
br i1 %lcmp.mod.i.i, label %while.body27.prol.i.i, label %while.body27.prol.loopexit.i.i
while.body27.prol.i.i: ; preds = %while.body27.lr.ph.i.i
%39 = add nsw i64 %38, %27
%arrayidx30.prol.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %39
%40 = bitcast %struct.FOUR_VECTOR* %arrayidx30.prol.i.i to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %33, i8* %40, i64 16, i32 4, i1 false), !tbaa.struct !15
%arrayidx35.prol.i.i = getelementptr inbounds float, float* %callable.coerce4, i64 %39
%41 = load float, float* %arrayidx35.prol.i.i
store float %41, float* %arrayidx37.prol.i.i
br label %while.body27.prol.loopexit.i.i
while.body27.prol.loopexit.i.i: ; preds = %while.body27.prol.i.i, %while.body27.lr.ph.i.i
%indvars.iv.unr.ph.i.i = phi i64 [ %indvars.iv.next.prol.i.i, %while.body27.prol.i.i ], [ %27, %while.body27.lr.ph.i.i ]
br i1 %34, label %while.end39.i.i, label %while.body27.lr.ph.new.i.i
while.body27.lr.ph.new.i.i: ; preds = %while.body27.prol.loopexit.i.i
br label %while.body27.i.i
while.body27.i.i: ; preds = %while.body27.i.i.while.body27.i.i_crit_edge, %while.body27.lr.ph.new.i.i
%indvars.iv.i.i = phi i64 [ %indvars.iv.unr.ph.i.i, %while.body27.lr.ph.new.i.i ], [ %indvars.iv.next.1.i.i, %while.body27.i.i.while.body27.i.i_crit_edge ]
%42 = add nsw i64 %indvars.iv.i.i, %38
%arrayidx30.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %42
%arrayidx32.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared, i64 0, i64 %indvars.iv.i.i
%43 = bitcast %struct.FOUR_VECTOR* %arrayidx32.i.i to i8*
%44 = bitcast %struct.FOUR_VECTOR* %arrayidx30.i.i to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %43, i8* %44, i64 16, i32 4, i1 false), !tbaa.struct !15
%arrayidx35.i.i = getelementptr inbounds float, float* %callable.coerce4, i64 %42
%45 = load float, float* %arrayidx35.i.i
%arrayidx37.i.i = getelementptr inbounds [100 x float], [100 x float]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9qB_shared, i64 0, i64 %indvars.iv.i.i
store float %45, float* %arrayidx37.i.i
%indvars.iv.next.i.i = add nsw i64 %indvars.iv.i.i, 128
%46 = add nsw i64 %indvars.iv.next.i.i, %38
%arrayidx30.1.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce3, i64 %46
%arrayidx32.1.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared, i64 0, i64 %indvars.iv.next.i.i
%47 = bitcast %struct.FOUR_VECTOR* %arrayidx32.1.i.i to i8*
%48 = bitcast %struct.FOUR_VECTOR* %arrayidx30.1.i.i to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %47, i8* %48, i64 16, i32 4, i1 false), !tbaa.struct !15
%arrayidx35.1.i.i = getelementptr inbounds float, float* %callable.coerce4, i64 %46
%49 = load float, float* %arrayidx35.1.i.i
%arrayidx37.1.i.i = getelementptr inbounds [100 x float], [100 x float]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9qB_shared, i64 0, i64 %indvars.iv.next.i.i
store float %49, float* %arrayidx37.1.i.i
%cmp26.1.i.i = icmp slt i64 %indvars.iv.i.i, -156
br i1 %cmp26.1.i.i, label %while.body27.i.i.while.body27.i.i_crit_edge, label %while.end39.i.i
while.body27.i.i.while.body27.i.i_crit_edge: ; preds = %while.body27.i.i
%indvars.iv.next.1.i.i = add nsw i64 %indvars.iv.i.i, 256
br label %while.body27.i.i
while.end39.i.i: ; preds = %while.body27.i.i, %while.body27.prol.loopexit.i.i, %if.end.i.i
%cmp5234.pr.i.i = phi i1 [ false, %if.end.i.i ], [ true, %while.body27.prol.loopexit.i.i ], [ true, %while.body27.i.i ]
call void @llvm.pacxx.barrier0() #6
br i1 %cmp5234.pr.i.i, label %while.body42.preheader.i.i, label %while.end140.i.i
while.body42.preheader.i.i: ; preds = %while.end39.i.i
br label %while.body42.i.i
while.body42.i.i: ; preds = %for.end.i.i.while.body42.i.i_crit_edge, %while.body42.preheader.i.i
%indvars.iv239.i.i = phi i64 [ %indvars.iv.next240.i.i, %for.end.i.i.while.body42.i.i_crit_edge ], [ %27, %while.body42.preheader.i.i ]
%x.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv239.i.i, i32 0
%y.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv239.i.i, i32 1
%z.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rA_shared, i64 0, i64 %indvars.iv239.i.i, i32 2
%50 = add nsw i64 %indvars.iv239.i.i, %28
%x113.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce5, i64 %50, i32 0
%y121.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce5, i64 %50, i32 1
%z129.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce5, i64 %50, i32 2
%w137.i.i = getelementptr inbounds %struct.FOUR_VECTOR, %struct.FOUR_VECTOR* %callable.coerce5, i64 %50, i32 3
br label %for.body45.i.i
for.body45.i.i: ; preds = %for.body45.i.i.for.body45.i.i_crit_edge, %while.body42.i.i
%indvars.iv237.i.i = phi i64 [ 0, %while.body42.i.i ], [ %indvars.iv.next238.i.i, %for.body45.i.i.for.body45.i.i_crit_edge ]
%51 = load float, float* %x.i.i, align 16, !tbaa !23
%x50.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared, i64 0, i64 %indvars.iv237.i.i, i32 0
%52 = load float, float* %x50.i.i, align 16, !tbaa !23
%add51.i.i = fadd float %51, %52
%mul58.i.i = fmul float %51, %52
%53 = load float, float* %y.i.i, align 4, !tbaa !25
%y63.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared, i64 0, i64 %indvars.iv237.i.i, i32 1
%54 = load float, float* %y63.i.i, align 4, !tbaa !25
%mul64.i.i = fmul float %53, %54
%add65.i.i = fadd float %mul58.i.i, %mul64.i.i
%55 = load float, float* %z.i.i, align 8, !tbaa !26
%z70.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared, i64 0, i64 %indvars.iv237.i.i, i32 2
%56 = load float, float* %z70.i.i, align 8, !tbaa !26
%mul71.i.i = fmul float %55, %56
%add72.i.i = fadd float %add65.i.i, %mul71.i.i
%sub73.i.i = fsub float %add51.i.i, %add72.i.i
%mul74.i.i = fmul float %mul3.i.i, %sub73.i.i
%sub75.i.i = fsub float -0.000000e+00, %mul74.i.i
%call76.i.i = call float @expf(float %sub75.i.i) #6
%mul77.i.i = fmul float %call76.i.i, 2.000000e+00
%57 = load float, float* %x.i.i, align 16, !tbaa !23
%58 = load float, float* %y63.i.i, align 4, !tbaa !25
%sub84.i.i = fsub float %57, %58
%mul87.i.i = fmul float %mul77.i.i, %sub84.i.i
%59 = load float, float* %y.i.i, align 4, !tbaa !25
%60 = load float, float* %z70.i.i, align 8, !tbaa !26
%sub94.i.i = fsub float %59, %60
%mul97.i.i = fmul float %mul77.i.i, %sub94.i.i
%61 = load float, float* %z.i.i, align 8, !tbaa !26
%w.i.i = getelementptr inbounds [100 x %struct.FOUR_VECTOR], [100 x %struct.FOUR_VECTOR]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9rB_shared, i64 0, i64 %indvars.iv237.i.i, i32 3
%62 = load float, float* %w.i.i, align 4, !tbaa !27
%sub103.i.i = fsub float %61, %62
%mul106.i.i = fmul float %mul77.i.i, %sub103.i.i
%arrayidx108.i.i = getelementptr inbounds [100 x float], [100 x float]* @_ZZ6kernelIN5pacxx2v25rangeEEvRT_7par_str7dim_strP7box_strP11FOUR_VECTORPfSA_E9qB_shared, i64 0, i64 %indvars.iv237.i.i
%63 = load float, float* %arrayidx108.i.i, align 4, !tbaa !16
%mul109.i.i = fmul float %call76.i.i, %63
%64 = load float, float* %x113.i.i, align 4, !tbaa !23
%add114.i.i = fadd float %64, %mul109.i.i
store float %add114.i.i, float* %x113.i.i, align 4, !tbaa !23
%65 = load float, float* %arrayidx108.i.i, align 4, !tbaa !16
%mul117.i.i = fmul float %mul87.i.i, %65
%66 = load float, float* %y121.i.i, align 4, !tbaa !25
%add122.i.i = fadd float %66, %mul117.i.i
store float %add122.i.i, float* %y121.i.i, align 4, !tbaa !25
%67 = load float, float* %arrayidx108.i.i, align 4, !tbaa !16
%mul125.i.i = fmul float %mul97.i.i, %67
%68 = load float, float* %z129.i.i, align 4, !tbaa !26
%add130.i.i = fadd float %68, %mul125.i.i
store float %add130.i.i, float* %z129.i.i, align 4, !tbaa !26
%69 = load float, float* %arrayidx108.i.i, align 4, !tbaa !16
%mul133.i.i = fmul float %mul106.i.i, %69
%70 = load float, float* %w137.i.i, align 4, !tbaa !27
%add138.i.i = fadd float %70, %mul133.i.i
store float %add138.i.i, float* %w137.i.i, align 4, !tbaa !27
%indvars.iv.next238.i.i = add nuw nsw i64 %indvars.iv237.i.i, 1
%exitcond.i.i = icmp eq i64 %indvars.iv.next238.i.i, 100
br i1 %exitcond.i.i, label %for.end.i.i, label %for.body45.i.i.for.body45.i.i_crit_edge
for.body45.i.i.for.body45.i.i_crit_edge: ; preds = %for.body45.i.i
br label %for.body45.i.i
for.end.i.i: ; preds = %for.body45.i.i
%cmp41.i.i = icmp slt i64 %indvars.iv239.i.i, -28
br i1 %cmp41.i.i, label %for.end.i.i.while.body42.i.i_crit_edge, label %while.end140.i.i
for.end.i.i.while.body42.i.i_crit_edge: ; preds = %for.end.i.i
%indvars.iv.next240.i.i = add nsw i64 %indvars.iv239.i.i, 128
br label %while.body42.i.i
while.end140.i.i: ; preds = %for.end.i.i, %while.end39.i.i
call void @llvm.pacxx.barrier0() #6
%71 = load i32, i32* %nn.i.i, align 8, !tbaa !20
%72 = sext i32 %71 to i64
%cmp14.i.i = icmp slt i64 %indvars.iv242.i.i, %72
br i1 %cmp14.i.i, label %while.end140.i.i.for.body.i.i_crit_edge, label %"_ZN5pacxx2v210kernelBodyIRZ25kernel_gpu_opencl_wrapper7par_str7dim_strP7box_strP11FOUR_VECTORPfS7_E3$_0EEvOT_.exit"
while.end140.i.i.for.body.i.i_crit_edge: ; preds = %while.end140.i.i
%indvars.iv.next243.i.i = add nuw nsw i64 %indvars.iv242.i.i, 1
br label %for.body.i.i
"_ZN5pacxx2v210kernelBodyIRZ25kernel_gpu_opencl_wrapper7par_str7dim_strP7box_strP11FOUR_VECTORPfS7_E3$_0EEvOT_.exit": ; preds = %while.end140.i.i, %while.end.i.i, %entry
ret void
; uselistorder directives
uselistorder [100 x %struct.FOUR_VECTOR] zeroinitializer, { 1, 0 }
}
; Function Attrs: alwaysinline nounwind readnone
declare i32 @llvm.pacxx.read.ctaid.x() #2
; Function Attrs: alwaysinline nounwind readnone
declare i32 @llvm.pacxx.read.tid.x() #2
; Function Attrs: alwaysinline argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #3
; Function Attrs: alwaysinline convergent nounwind
declare void @llvm.pacxx.barrier0() #4
; Function Attrs: alwaysinline nounwind
declare float @expf(float) local_unnamed_addr #5
attributes #0 = { "target-cpu"="haswell" "target-features"="+sse2,+cx16,-tbm,-avx512ifma,-gfni,-sha,-fma4,-vpclmulqdq,-prfchw,+bmi2,-xsavec,+fsgsbase,+popcnt,+aes,-avx512bitalg,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-xop,-rdseed,-ibt,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,-adx,-avx512pf,+sse3" }
attributes #1 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+sse2,+cx16,-tbm,-avx512ifma,-gfni,-sha,-fma4,-vpclmulqdq,-prfchw,+bmi2,-xsavec,+fsgsbase,+popcnt,+aes,-avx512bitalg,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-xop,-rdseed,-ibt,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,-adx,-avx512pf,+sse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { alwaysinline nounwind readnone "target-cpu"="haswell" "target-features"="+sse2,+cx16,-tbm,-avx512ifma,-gfni,-sha,-fma4,-vpclmulqdq,-prfchw,+bmi2,-xsavec,+fsgsbase,+popcnt,+aes,-avx512bitalg,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-xop,-rdseed,-ibt,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,-adx,-avx512pf,+sse3" }
attributes #3 = { alwaysinline argmemonly nounwind "target-cpu"="haswell" "target-features"="+sse2,+cx16,-tbm,-avx512ifma,-gfni,-sha,-fma4,-vpclmulqdq,-prfchw,+bmi2,-xsavec,+fsgsbase,+popcnt,+aes,-avx512bitalg,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-xop,-rdseed,-ibt,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,-adx,-avx512pf,+sse3" }
attributes #4 = { alwaysinline convergent nounwind "target-cpu"="haswell" "target-features"="+sse2,+cx16,-tbm,-avx512ifma,-gfni,-sha,-fma4,-vpclmulqdq,-prfchw,+bmi2,-xsavec,+fsgsbase,+popcnt,+aes,-avx512bitalg,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-xop,-rdseed,-ibt,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,-adx,-avx512pf,+sse3" }
attributes #5 = { alwaysinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+sse2,+cx16,-tbm,-avx512ifma,-gfni,-sha,-fma4,-vpclmulqdq,-prfchw,+bmi2,-xsavec,+fsgsbase,+popcnt,+aes,-avx512bitalg,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-xop,-rdseed,-ibt,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,-rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,-adx,-avx512pf,+sse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #6 = { nounwind }
!llvm.ident = !{!1, !2, !2}
!pacxx.kernel = !{!3}
!llvm.module.flags = !{!4}
!0 = !{}
!1 = !{!"PACXX"}
!2 = !{!"clang version 6.0.1"}
!3 = !{void (%struct.par_str, %struct.dim_str, %struct.box_str*, %struct.FOUR_VECTOR*, float*, %struct.FOUR_VECTOR*, i8**)* @"_ZN5pacxx2v213genericKernelIZ25kernel_gpu_opencl_wrapper7par_str7dim_strP7box_strP11FOUR_VECTORPfS7_E3$_0EEvT_PPKc"}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{null}
!6 = distinct !{!6, !7}
!7 = !{!"llvm.loop.vectorize.enable", i1 false}
!8 = !{!"genericKernel"}
!9 = !{!10, !14, i64 16}
!10 = !{!"_ZTS7box_str", !11, i64 0, !11, i64 4, !11, i64 8, !11, i64 12, !14, i64 16, !11, i64 24, !12, i64 32}
!11 = !{!"int", !12, i64 0}
!12 = !{!"omnipotent char", !13, i64 0}
!13 = !{!"Simple C++ TBAA"}
!14 = !{!"long", !12, i64 0}
!15 = !{i64 0, i64 4, !16, i64 4, i64 4, !16, i64 8, i64 4, !16, i64 12, i64 4, !16}
!16 = !{!17, !17, i64 0}
!17 = !{!"float", !12, i64 0}
!18 = distinct !{!18, !19}
!19 = !{!"llvm.loop.unroll.disable"}
!20 = !{!10, !11, i64 24}
!21 = !{!22, !11, i64 12}
!22 = !{!"_ZTS7nei_str", !11, i64 0, !11, i64 4, !11, i64 8, !11, i64 12, !14, i64 16}
!23 = !{!24, !17, i64 0}
!24 = !{!"_ZTS11FOUR_VECTOR", !17, i64 0, !17, i64 4, !17, i64 8, !17, i64 12}
!25 = !{!24, !17, i64 4}
!26 = !{!24, !17, i64 8}
!27 = !{!24, !17, i64 12}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment