Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@isuruf
Last active September 18, 2019 07:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save isuruf/c05160e75fb8fa7eef9119ad7902fbfd to your computer and use it in GitHub Desktop.
Save isuruf/c05160e75fb8fa7eef9119ad7902fbfd to your computer and use it in GitHub Desktop.
LLVM bug
clang-7 in1.ll -O3 -fvectorize -emit-llvm -S -o out1.ll
clang-7 in2.ll -O3 -fvectorize -emit-llvm -S -o out2.ll
--- in1.ll 2019-09-18 02:05:54.012101785 -0500
+++ in2.ll 2019-09-18 02:05:44.811944483 -0500
@@ -52,9 +52,9 @@
%mul.i = mul nsw i32 %conv.i, -128
%31 = load i64, i64* %_local_id_x, align 8, !tbaa !18, !noalias !12, !llvm.mem.parallel_loop_access !16
%conv2.i = trunc i64 %30 to i32
- %add.i = add i32 %0, -1
- %add4.i = add i32 %add.i, %mul.i
- %add5.i = sub i32 %add4.i, %conv2.i
+ %add.i = xor i32 %conv2.i, -1
+ %add4.i = add i32 %add.i, %0
+ %add5.i = add i32 %add4.i, %mul.i
%cmp.i = icmp sgt i32 %add5.i, -1
br i1 %cmp.i, label %32, label %.r_exit.i
@@ -144,9 +144,9 @@
%mul.i.i = mul nsw i32 %conv.i.i, -128
%44 = load i64, i64* %_local_id_x.i, align 8, !tbaa !18, !noalias !30, !llvm.mem.parallel_loop_access !16
%conv2.i.i = trunc i64 %43 to i32
- %add.i.i = add i32 %9, -1
- %add4.i.i = add i32 %add.i.i, %mul.i.i
- %add5.i.i = sub i32 %add4.i.i, %conv2.i.i
+ %add.i.i = xor i32 %conv2.i.i, -1
+ %add4.i.i = add i32 %add.i.i, %9
+ %add5.i.i = add i32 %add4.i.i, %mul.i.i
%cmp.i.i = icmp sgt i32 %add5.i.i, -1
br i1 %cmp.i.i, label %45, label %.r_exit.i.i
@@ -242,9 +242,9 @@
%mul.i.i = mul nsw i32 %conv.i.i, -128
%42 = load i64, i64* %_local_id_x.i, align 8, !tbaa !18, !noalias !40, !llvm.mem.parallel_loop_access !16
%conv2.i.i = trunc i64 %41 to i32
- %add.i.i = add i32 %9, -1
- %add4.i.i = add i32 %add.i.i, %mul.i.i
- %add5.i.i = sub i32 %add4.i.i, %conv2.i.i
+ %add.i.i = xor i32 %conv2.i.i, -1
+ %add4.i.i = add i32 %add.i.i, %9
+ %add5.i.i = add i32 %add4.i.i, %mul.i.i
%cmp.i.i = icmp sgt i32 %add5.i.i, -1
br i1 %cmp.i.i, label %43, label %.r_exit.i.i
; ModuleID = 'parallel_bc'
source_filename = "parallel_bc"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@_group_id_x = external local_unnamed_addr global i64, align 8
@_group_id_y = external local_unnamed_addr global i64, align 8
@_group_id_z = external local_unnamed_addr global i64, align 8
@_local_id_x = external local_unnamed_addr global i64, align 8
@_local_id_y = external local_unnamed_addr global i64, align 8
@_local_id_z = external local_unnamed_addr global i64, align 8
; Function Attrs: noduplicate
define linkonce void @pocl.barrier() #0 {
ret void
}
; Function Attrs: alwaysinline norecurse nounwind
define void @_pocl_kernel_grudge_assign_0(i32, double* noalias nocapture, i32, double* noalias nocapture readonly, i32, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }*, i64, i64, i64) #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_name !9 !reqd_work_group_size !10 !pocl_generated !11 {
%_local_id_x = alloca i64
%_local_id_y = alloca i64
%_local_id_z = alloca i64
%10 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 3
%11 = getelementptr i8*, i8** %10, i64 0
%12 = load i8*, i8** %11
%13 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 4
%14 = getelementptr i32*, i32** %13, i64 0
%15 = load i32*, i32** %14
%16 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 5
%17 = getelementptr i32, i32* %16, i64 0
%18 = load i32, i32* %17
%19 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 3
%20 = getelementptr i8*, i8** %19, i64 0
%21 = load i8*, i8** %20
%22 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 4
%23 = getelementptr i32*, i32** %22, i64 0
%24 = load i32*, i32** %23
%25 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 5
%26 = getelementptr i32, i32* %25, i64 0
%27 = load i32, i32* %26
store i64 0, i64* %_local_id_x, !noalias !12
store i64 0, i64* %_local_id_y, !noalias !12
store i64 0, i64* %_local_id_z, !noalias !12
store i64 0, i64* %_local_id_x, !noalias !12
br label %pregion_for_entry..i
pregion_for_entry..i: ; preds = %.r_exit.i, %9
%28 = load i64, i64* %_local_id_z, !noalias !12, !llvm.mem.parallel_loop_access !16
%29 = load i64, i64* %_local_id_y, !noalias !12, !llvm.mem.parallel_loop_access !16
%30 = load i64, i64* %_local_id_x, !noalias !12, !llvm.mem.parallel_loop_access !16
%conv.i = trunc i64 %6 to i32
%mul.i = mul nsw i32 %conv.i, -128
%31 = load i64, i64* %_local_id_x, align 8, !tbaa !18, !noalias !12, !llvm.mem.parallel_loop_access !16
%conv2.i = trunc i64 %30 to i32
%add.i = add i32 %0, -1
%add4.i = add i32 %add.i, %mul.i
%add5.i = sub i32 %add4.i, %conv2.i
%cmp.i = icmp sgt i32 %add5.i, -1
br i1 %cmp.i, label %32, label %.r_exit.i
; <label>:32: ; preds = %pregion_for_entry..i
%mul9.i = shl nsw i32 %conv.i, 7
%add10.i = add i32 %mul9.i, %conv2.i
%add13.i = add i32 %add10.i, %4
%idxprom.i = sext i32 %add13.i to i64
%arrayidx.i = getelementptr inbounds double, double* %3, i64 %idxprom.i
%33 = bitcast double* %arrayidx.i to i64*
%34 = load i64, i64* %33, align 8, !tbaa !22, !alias.scope !24, !noalias !25, !llvm.mem.parallel_loop_access !16
%add20.i = add i32 %add10.i, %2
%idxprom21.i = sext i32 %add20.i to i64
%arrayidx22.i = getelementptr inbounds double, double* %1, i64 %idxprom21.i
%35 = bitcast double* %arrayidx22.i to i64*
store i64 %34, i64* %35, align 8, !tbaa !22, !alias.scope !25, !noalias !24, !llvm.mem.parallel_loop_access !16
br label %.r_exit.i
.r_exit.i: ; preds = %32, %pregion_for_entry..i
%36 = load i64, i64* %_local_id_x, !noalias !12
%37 = add i64 %36, 1
store i64 %37, i64* %_local_id_x, !noalias !12
%38 = load i64, i64* %_local_id_x, !noalias !12
%39 = icmp ult i64 %38, 128
br i1 %39, label %pregion_for_entry..i, label %grudge_assign_0.exit, !llvm.loop !17
grudge_assign_0.exit: ; preds = %.r_exit.i
ret void
; uselistorder directives
uselistorder { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, { 3, 4, 5, 0, 1, 2 }
}
define void @_pocl_kernel_grudge_assign_0_workgroup(i8**, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }*, i64, i64, i64) {
%_local_id_x.i = alloca i64
%_local_id_y.i = alloca i64
%_local_id_z.i = alloca i64
%6 = getelementptr i8*, i8** %0, i32 0
%7 = load i8*, i8** %6
%8 = bitcast i8* %7 to i32*
%9 = load i32, i32* %8
%10 = getelementptr i8*, i8** %0, i32 1
%11 = load i8*, i8** %10
%12 = bitcast i8* %11 to double**
%13 = load double*, double** %12
%14 = getelementptr i8*, i8** %0, i32 2
%15 = load i8*, i8** %14
%16 = bitcast i8* %15 to i32*
%17 = load i32, i32* %16
%18 = getelementptr i8*, i8** %0, i32 3
%19 = load i8*, i8** %18
%20 = bitcast i8* %19 to double**
%21 = load double*, double** %20
%22 = getelementptr i8*, i8** %0, i32 4
%23 = load i8*, i8** %22
%24 = bitcast i8* %23 to i32*
%25 = load i32, i32* %24
%26 = bitcast i64* %_local_id_x.i to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %26)
%27 = bitcast i64* %_local_id_y.i to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %27)
%28 = bitcast i64* %_local_id_z.i to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %28)
%29 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3
%30 = load i8*, i8** %29, !noalias !26
%31 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4
%32 = load i32*, i32** %31, !noalias !26
%33 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5
%34 = load i32, i32* %33, !noalias !26
%35 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3
%36 = load i8*, i8** %35, !noalias !26
%37 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4
%38 = load i32*, i32** %37, !noalias !26
%39 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5
%40 = load i32, i32* %39, !noalias !26
store i64 0, i64* %_local_id_x.i, !noalias !30
store i64 0, i64* %_local_id_y.i, !noalias !30
store i64 0, i64* %_local_id_z.i, !noalias !30
store i64 0, i64* %_local_id_x.i, !noalias !30
br label %pregion_for_entry..i.i
pregion_for_entry..i.i: ; preds = %.r_exit.i.i, %5
%41 = load i64, i64* %_local_id_z.i, !noalias !30, !llvm.mem.parallel_loop_access !16
%42 = load i64, i64* %_local_id_y.i, !noalias !30, !llvm.mem.parallel_loop_access !16
%43 = load i64, i64* %_local_id_x.i, !noalias !30, !llvm.mem.parallel_loop_access !16
%conv.i.i = trunc i64 %2 to i32
%mul.i.i = mul nsw i32 %conv.i.i, -128
%44 = load i64, i64* %_local_id_x.i, align 8, !tbaa !18, !noalias !30, !llvm.mem.parallel_loop_access !16
%conv2.i.i = trunc i64 %43 to i32
%add.i.i = add i32 %9, -1
%add4.i.i = add i32 %add.i.i, %mul.i.i
%add5.i.i = sub i32 %add4.i.i, %conv2.i.i
%cmp.i.i = icmp sgt i32 %add5.i.i, -1
br i1 %cmp.i.i, label %45, label %.r_exit.i.i
; <label>:45: ; preds = %pregion_for_entry..i.i
%mul9.i.i = shl nsw i32 %conv.i.i, 7
%add10.i.i = add i32 %mul9.i.i, %conv2.i.i
%add13.i.i = add i32 %add10.i.i, %25
%idxprom.i.i = sext i32 %add13.i.i to i64
%arrayidx.i.i = getelementptr inbounds double, double* %21, i64 %idxprom.i.i
%46 = bitcast double* %arrayidx.i.i to i64*
%47 = load i64, i64* %46, align 8, !tbaa !22, !alias.scope !34, !noalias !35, !llvm.mem.parallel_loop_access !16
%add20.i.i = add i32 %add10.i.i, %17
%idxprom21.i.i = sext i32 %add20.i.i to i64
%arrayidx22.i.i = getelementptr inbounds double, double* %13, i64 %idxprom21.i.i
%48 = bitcast double* %arrayidx22.i.i to i64*
store i64 %47, i64* %48, align 8, !tbaa !22, !alias.scope !35, !noalias !34, !llvm.mem.parallel_loop_access !16
br label %.r_exit.i.i
.r_exit.i.i: ; preds = %45, %pregion_for_entry..i.i
%49 = load i64, i64* %_local_id_x.i, !noalias !30
%50 = add i64 %49, 1
store i64 %50, i64* %_local_id_x.i, !noalias !30
%51 = load i64, i64* %_local_id_x.i, !noalias !30
%52 = icmp ult i64 %51, 128
br i1 %52, label %pregion_for_entry..i.i, label %_pocl_kernel_grudge_assign_0.exit, !llvm.loop !17
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i
%53 = bitcast i64* %_local_id_x.i to i8*
call void @llvm.lifetime.end.p0i8(i64 8, i8* %53)
%54 = bitcast i64* %_local_id_y.i to i8*
call void @llvm.lifetime.end.p0i8(i64 8, i8* %54)
%55 = bitcast i64* %_local_id_z.i to i8*
call void @llvm.lifetime.end.p0i8(i64 8, i8* %55)
ret void
; uselistorder directives
uselistorder i64* %_local_id_z.i, { 0, 2, 3, 1 }
uselistorder i64* %_local_id_y.i, { 0, 2, 3, 1 }
uselistorder i64* %_local_id_x.i, { 0, 2, 3, 4, 5, 6, 7, 8, 1 }
}
define void @_pocl_kernel_grudge_assign_0_workgroup_fast(i8**, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }*, i64, i64, i64) {
%_local_id_x.i = alloca i64
%_local_id_y.i = alloca i64
%_local_id_z.i = alloca i64
%6 = getelementptr i8*, i8** %0, i32 0
%7 = load i8*, i8** %6
%8 = bitcast i8* %7 to i32*
%9 = load i32, i32* %8
%10 = getelementptr i8*, i8** %0, i32 1
%11 = load i8*, i8** %10
%12 = bitcast i8* %11 to double*
%13 = getelementptr i8*, i8** %0, i32 2
%14 = load i8*, i8** %13
%15 = bitcast i8* %14 to i32*
%16 = load i32, i32* %15
%17 = getelementptr i8*, i8** %0, i32 3
%18 = load i8*, i8** %17
%19 = bitcast i8* %18 to double*
%20 = getelementptr i8*, i8** %0, i32 4
%21 = load i8*, i8** %20
%22 = bitcast i8* %21 to i32*
%23 = load i32, i32* %22
%24 = bitcast i64* %_local_id_x.i to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %24)
%25 = bitcast i64* %_local_id_y.i to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %25)
%26 = bitcast i64* %_local_id_z.i to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %26)
%27 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3
%28 = load i8*, i8** %27, !noalias !36
%29 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4
%30 = load i32*, i32** %29, !noalias !36
%31 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5
%32 = load i32, i32* %31, !noalias !36
%33 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3
%34 = load i8*, i8** %33, !noalias !36
%35 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4
%36 = load i32*, i32** %35, !noalias !36
%37 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5
%38 = load i32, i32* %37, !noalias !36
store i64 0, i64* %_local_id_x.i, !noalias !40
store i64 0, i64* %_local_id_y.i, !noalias !40
store i64 0, i64* %_local_id_z.i, !noalias !40
store i64 0, i64* %_local_id_x.i, !noalias !40
br label %pregion_for_entry..i.i
pregion_for_entry..i.i: ; preds = %.r_exit.i.i, %5
%39 = load i64, i64* %_local_id_z.i, !noalias !40, !llvm.mem.parallel_loop_access !16
%40 = load i64, i64* %_local_id_y.i, !noalias !40, !llvm.mem.parallel_loop_access !16
%41 = load i64, i64* %_local_id_x.i, !noalias !40, !llvm.mem.parallel_loop_access !16
%conv.i.i = trunc i64 %2 to i32
%mul.i.i = mul nsw i32 %conv.i.i, -128
%42 = load i64, i64* %_local_id_x.i, align 8, !tbaa !18, !noalias !40, !llvm.mem.parallel_loop_access !16
%conv2.i.i = trunc i64 %41 to i32
%add.i.i = add i32 %9, -1
%add4.i.i = add i32 %add.i.i, %mul.i.i
%add5.i.i = sub i32 %add4.i.i, %conv2.i.i
%cmp.i.i = icmp sgt i32 %add5.i.i, -1
br i1 %cmp.i.i, label %43, label %.r_exit.i.i
; <label>:43: ; preds = %pregion_for_entry..i.i
%mul9.i.i = shl nsw i32 %conv.i.i, 7
%add10.i.i = add i32 %mul9.i.i, %conv2.i.i
%add13.i.i = add i32 %add10.i.i, %23
%idxprom.i.i = sext i32 %add13.i.i to i64
%arrayidx.i.i = getelementptr inbounds double, double* %19, i64 %idxprom.i.i
%44 = bitcast double* %arrayidx.i.i to i64*
%45 = load i64, i64* %44, align 8, !tbaa !22, !alias.scope !44, !noalias !45, !llvm.mem.parallel_loop_access !16
%add20.i.i = add i32 %add10.i.i, %16
%idxprom21.i.i = sext i32 %add20.i.i to i64
%arrayidx22.i.i = getelementptr inbounds double, double* %12, i64 %idxprom21.i.i
%46 = bitcast double* %arrayidx22.i.i to i64*
store i64 %45, i64* %46, align 8, !tbaa !22, !alias.scope !45, !noalias !44, !llvm.mem.parallel_loop_access !16
br label %.r_exit.i.i
.r_exit.i.i: ; preds = %43, %pregion_for_entry..i.i
%47 = load i64, i64* %_local_id_x.i, !noalias !40
%48 = add i64 %47, 1
store i64 %48, i64* %_local_id_x.i, !noalias !40
%49 = load i64, i64* %_local_id_x.i, !noalias !40
%50 = icmp ult i64 %49, 128
br i1 %50, label %pregion_for_entry..i.i, label %_pocl_kernel_grudge_assign_0.exit, !llvm.loop !17
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i
%51 = bitcast i64* %_local_id_x.i to i8*
call void @llvm.lifetime.end.p0i8(i64 8, i8* %51)
%52 = bitcast i64* %_local_id_y.i to i8*
call void @llvm.lifetime.end.p0i8(i64 8, i8* %52)
%53 = bitcast i64* %_local_id_z.i to i8*
call void @llvm.lifetime.end.p0i8(i64 8, i8* %53)
ret void
; uselistorder directives
uselistorder i64* %_local_id_z.i, { 0, 2, 3, 1 }
uselistorder i64* %_local_id_y.i, { 0, 2, 3, 1 }
uselistorder i64* %_local_id_x.i, { 0, 2, 3, 4, 5, 6, 7, 8, 1 }
uselistorder i64 0, { 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 9, 10 }
uselistorder i32 5, { 0, 1, 2, 3, 5, 4 }
uselistorder i64 8, { 0, 2, 4, 1, 3, 5, 6, 8, 10, 7, 9, 11 }
uselistorder i32 4, { 0, 1, 4, 2, 3, 5, 7, 6 }
uselistorder i32 3, { 0, 1, 4, 2, 3, 5, 7, 6 }
uselistorder i32 0, { 0, 1, 2, 3, 4, 5, 12, 6, 7, 8, 9, 10, 11, 13, 17, 18, 19, 14, 15, 16 }
uselistorder i32 1, { 6, 0, 1, 2, 7, 3, 4, 5, 8, 9, 10 }
}
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2
attributes #0 = { noduplicate }
attributes #1 = { alwaysinline norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { argmemonly nounwind }
!llvm.module.flags = !{!0, !1, !2}
!opencl.ocl.version = !{!3}
!llvm.ident = !{!4}
!opencl.spir.version = !{!3}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 1, i32 2}
!4 = !{!"clang version 6.0.1 (tags/RELEASE_601/final)"}
!5 = !{i32 0, i32 1, i32 0, i32 1, i32 0}
!6 = !{!"none", !"none", !"none", !"none", !"none"}
!7 = !{!"int", !"double*", !"int", !"double*", !"int"}
!8 = !{!"", !"restrict", !"", !"restrict const", !""}
!9 = !{!"grdg_n", !"expr_8", !"expr_8_offset", !"grdg_sub_discr_dx0_dr0", !"grdg_sub_discr_dx0_dr0_offset"}
!10 = !{i32 128, i32 1, i32 1}
!11 = !{i32 1}
!12 = !{!13, !15}
!13 = distinct !{!13, !14, !"grudge_assign_0: %expr_8"}
!14 = distinct !{!14, !"grudge_assign_0"}
!15 = distinct !{!15, !14, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"}
!16 = !{!17}
!17 = distinct !{!17}
!18 = !{!19, !19, i64 0}
!19 = !{!"long", !20, i64 0}
!20 = !{!"omnipotent char", !21, i64 0}
!21 = !{!"Simple C/C++ TBAA"}
!22 = !{!23, !23, i64 0}
!23 = !{!"double", !20, i64 0}
!24 = !{!15}
!25 = !{!13}
!26 = !{!27, !29}
!27 = distinct !{!27, !28, !"_pocl_kernel_grudge_assign_0: argument 0"}
!28 = distinct !{!28, !"_pocl_kernel_grudge_assign_0"}
!29 = distinct !{!29, !28, !"_pocl_kernel_grudge_assign_0: argument 1"}
!30 = !{!31, !33, !27, !29}
!31 = distinct !{!31, !32, !"grudge_assign_0: %expr_8"}
!32 = distinct !{!32, !"grudge_assign_0"}
!33 = distinct !{!33, !32, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"}
!34 = !{!33, !29}
!35 = !{!31, !27}
!36 = !{!37, !39}
!37 = distinct !{!37, !38, !"_pocl_kernel_grudge_assign_0: argument 0"}
!38 = distinct !{!38, !"_pocl_kernel_grudge_assign_0"}
!39 = distinct !{!39, !38, !"_pocl_kernel_grudge_assign_0: argument 1"}
!40 = !{!41, !43, !37, !39}
!41 = distinct !{!41, !42, !"grudge_assign_0: %expr_8"}
!42 = distinct !{!42, !"grudge_assign_0"}
!43 = distinct !{!43, !42, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"}
!44 = !{!43, !39}
!45 = !{!41, !37}
; ModuleID = 'parallel_bc'
source_filename = "parallel_bc"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@_group_id_x = external local_unnamed_addr global i64, align 8
@_group_id_y = external local_unnamed_addr global i64, align 8
@_group_id_z = external local_unnamed_addr global i64, align 8
@_local_id_x = external local_unnamed_addr global i64, align 8
@_local_id_y = external local_unnamed_addr global i64, align 8
@_local_id_z = external local_unnamed_addr global i64, align 8
; Function Attrs: noduplicate
define linkonce void @pocl.barrier() #0 {
ret void
}
; Function Attrs: alwaysinline norecurse nounwind
define void @_pocl_kernel_grudge_assign_0(i32, double* noalias nocapture, i32, double* noalias nocapture readonly, i32, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }*, i64, i64, i64) #1 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_name !9 !reqd_work_group_size !10 !pocl_generated !11 {
%_local_id_x = alloca i64
%_local_id_y = alloca i64
%_local_id_z = alloca i64
%10 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 3
%11 = getelementptr i8*, i8** %10, i64 0
%12 = load i8*, i8** %11
%13 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 4
%14 = getelementptr i32*, i32** %13, i64 0
%15 = load i32*, i32** %14
%16 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 5
%17 = getelementptr i32, i32* %16, i64 0
%18 = load i32, i32* %17
%19 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 3
%20 = getelementptr i8*, i8** %19, i64 0
%21 = load i8*, i8** %20
%22 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 4
%23 = getelementptr i32*, i32** %22, i64 0
%24 = load i32*, i32** %23
%25 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i32 0, i32 5
%26 = getelementptr i32, i32* %25, i64 0
%27 = load i32, i32* %26
store i64 0, i64* %_local_id_x, !noalias !12
store i64 0, i64* %_local_id_y, !noalias !12
store i64 0, i64* %_local_id_z, !noalias !12
store i64 0, i64* %_local_id_x, !noalias !12
br label %pregion_for_entry..i
pregion_for_entry..i: ; preds = %.r_exit.i, %9
%28 = load i64, i64* %_local_id_z, !noalias !12, !llvm.mem.parallel_loop_access !16
%29 = load i64, i64* %_local_id_y, !noalias !12, !llvm.mem.parallel_loop_access !16
%30 = load i64, i64* %_local_id_x, !noalias !12, !llvm.mem.parallel_loop_access !16
%conv.i = trunc i64 %6 to i32
%mul.i = mul nsw i32 %conv.i, -128
%31 = load i64, i64* %_local_id_x, align 8, !tbaa !18, !noalias !12, !llvm.mem.parallel_loop_access !16
%conv2.i = trunc i64 %30 to i32
%add.i = xor i32 %conv2.i, -1
%add4.i = add i32 %add.i, %0
%add5.i = add i32 %add4.i, %mul.i
%cmp.i = icmp sgt i32 %add5.i, -1
br i1 %cmp.i, label %32, label %.r_exit.i
; <label>:32: ; preds = %pregion_for_entry..i
%mul9.i = shl nsw i32 %conv.i, 7
%add10.i = add i32 %mul9.i, %conv2.i
%add13.i = add i32 %add10.i, %4
%idxprom.i = sext i32 %add13.i to i64
%arrayidx.i = getelementptr inbounds double, double* %3, i64 %idxprom.i
%33 = bitcast double* %arrayidx.i to i64*
%34 = load i64, i64* %33, align 8, !tbaa !22, !alias.scope !24, !noalias !25, !llvm.mem.parallel_loop_access !16
%add20.i = add i32 %add10.i, %2
%idxprom21.i = sext i32 %add20.i to i64
%arrayidx22.i = getelementptr inbounds double, double* %1, i64 %idxprom21.i
%35 = bitcast double* %arrayidx22.i to i64*
store i64 %34, i64* %35, align 8, !tbaa !22, !alias.scope !25, !noalias !24, !llvm.mem.parallel_loop_access !16
br label %.r_exit.i
.r_exit.i: ; preds = %32, %pregion_for_entry..i
%36 = load i64, i64* %_local_id_x, !noalias !12
%37 = add i64 %36, 1
store i64 %37, i64* %_local_id_x, !noalias !12
%38 = load i64, i64* %_local_id_x, !noalias !12
%39 = icmp ult i64 %38, 128
br i1 %39, label %pregion_for_entry..i, label %grudge_assign_0.exit, !llvm.loop !17
grudge_assign_0.exit: ; preds = %.r_exit.i
ret void
; uselistorder directives
uselistorder { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, { 3, 4, 5, 0, 1, 2 }
}
define void @_pocl_kernel_grudge_assign_0_workgroup(i8**, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }*, i64, i64, i64) {
%_local_id_x.i = alloca i64
%_local_id_y.i = alloca i64
%_local_id_z.i = alloca i64
%6 = getelementptr i8*, i8** %0, i32 0
%7 = load i8*, i8** %6
%8 = bitcast i8* %7 to i32*
%9 = load i32, i32* %8
%10 = getelementptr i8*, i8** %0, i32 1
%11 = load i8*, i8** %10
%12 = bitcast i8* %11 to double**
%13 = load double*, double** %12
%14 = getelementptr i8*, i8** %0, i32 2
%15 = load i8*, i8** %14
%16 = bitcast i8* %15 to i32*
%17 = load i32, i32* %16
%18 = getelementptr i8*, i8** %0, i32 3
%19 = load i8*, i8** %18
%20 = bitcast i8* %19 to double**
%21 = load double*, double** %20
%22 = getelementptr i8*, i8** %0, i32 4
%23 = load i8*, i8** %22
%24 = bitcast i8* %23 to i32*
%25 = load i32, i32* %24
%26 = bitcast i64* %_local_id_x.i to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %26)
%27 = bitcast i64* %_local_id_y.i to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %27)
%28 = bitcast i64* %_local_id_z.i to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %28)
%29 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3
%30 = load i8*, i8** %29, !noalias !26
%31 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4
%32 = load i32*, i32** %31, !noalias !26
%33 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5
%34 = load i32, i32* %33, !noalias !26
%35 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3
%36 = load i8*, i8** %35, !noalias !26
%37 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4
%38 = load i32*, i32** %37, !noalias !26
%39 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5
%40 = load i32, i32* %39, !noalias !26
store i64 0, i64* %_local_id_x.i, !noalias !30
store i64 0, i64* %_local_id_y.i, !noalias !30
store i64 0, i64* %_local_id_z.i, !noalias !30
store i64 0, i64* %_local_id_x.i, !noalias !30
br label %pregion_for_entry..i.i
pregion_for_entry..i.i: ; preds = %.r_exit.i.i, %5
%41 = load i64, i64* %_local_id_z.i, !noalias !30, !llvm.mem.parallel_loop_access !16
%42 = load i64, i64* %_local_id_y.i, !noalias !30, !llvm.mem.parallel_loop_access !16
%43 = load i64, i64* %_local_id_x.i, !noalias !30, !llvm.mem.parallel_loop_access !16
%conv.i.i = trunc i64 %2 to i32
%mul.i.i = mul nsw i32 %conv.i.i, -128
%44 = load i64, i64* %_local_id_x.i, align 8, !tbaa !18, !noalias !30, !llvm.mem.parallel_loop_access !16
%conv2.i.i = trunc i64 %43 to i32
%add.i.i = xor i32 %conv2.i.i, -1
%add4.i.i = add i32 %add.i.i, %9
%add5.i.i = add i32 %add4.i.i, %mul.i.i
%cmp.i.i = icmp sgt i32 %add5.i.i, -1
br i1 %cmp.i.i, label %45, label %.r_exit.i.i
; <label>:45: ; preds = %pregion_for_entry..i.i
%mul9.i.i = shl nsw i32 %conv.i.i, 7
%add10.i.i = add i32 %mul9.i.i, %conv2.i.i
%add13.i.i = add i32 %add10.i.i, %25
%idxprom.i.i = sext i32 %add13.i.i to i64
%arrayidx.i.i = getelementptr inbounds double, double* %21, i64 %idxprom.i.i
%46 = bitcast double* %arrayidx.i.i to i64*
%47 = load i64, i64* %46, align 8, !tbaa !22, !alias.scope !34, !noalias !35, !llvm.mem.parallel_loop_access !16
%add20.i.i = add i32 %add10.i.i, %17
%idxprom21.i.i = sext i32 %add20.i.i to i64
%arrayidx22.i.i = getelementptr inbounds double, double* %13, i64 %idxprom21.i.i
%48 = bitcast double* %arrayidx22.i.i to i64*
store i64 %47, i64* %48, align 8, !tbaa !22, !alias.scope !35, !noalias !34, !llvm.mem.parallel_loop_access !16
br label %.r_exit.i.i
.r_exit.i.i: ; preds = %45, %pregion_for_entry..i.i
%49 = load i64, i64* %_local_id_x.i, !noalias !30
%50 = add i64 %49, 1
store i64 %50, i64* %_local_id_x.i, !noalias !30
%51 = load i64, i64* %_local_id_x.i, !noalias !30
%52 = icmp ult i64 %51, 128
br i1 %52, label %pregion_for_entry..i.i, label %_pocl_kernel_grudge_assign_0.exit, !llvm.loop !17
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i
%53 = bitcast i64* %_local_id_x.i to i8*
call void @llvm.lifetime.end.p0i8(i64 8, i8* %53)
%54 = bitcast i64* %_local_id_y.i to i8*
call void @llvm.lifetime.end.p0i8(i64 8, i8* %54)
%55 = bitcast i64* %_local_id_z.i to i8*
call void @llvm.lifetime.end.p0i8(i64 8, i8* %55)
ret void
; uselistorder directives
uselistorder i64* %_local_id_z.i, { 0, 2, 3, 1 }
uselistorder i64* %_local_id_y.i, { 0, 2, 3, 1 }
uselistorder i64* %_local_id_x.i, { 0, 2, 3, 4, 5, 6, 7, 8, 1 }
}
define void @_pocl_kernel_grudge_assign_0_workgroup_fast(i8**, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }*, i64, i64, i64) {
%_local_id_x.i = alloca i64
%_local_id_y.i = alloca i64
%_local_id_z.i = alloca i64
%6 = getelementptr i8*, i8** %0, i32 0
%7 = load i8*, i8** %6
%8 = bitcast i8* %7 to i32*
%9 = load i32, i32* %8
%10 = getelementptr i8*, i8** %0, i32 1
%11 = load i8*, i8** %10
%12 = bitcast i8* %11 to double*
%13 = getelementptr i8*, i8** %0, i32 2
%14 = load i8*, i8** %13
%15 = bitcast i8* %14 to i32*
%16 = load i32, i32* %15
%17 = getelementptr i8*, i8** %0, i32 3
%18 = load i8*, i8** %17
%19 = bitcast i8* %18 to double*
%20 = getelementptr i8*, i8** %0, i32 4
%21 = load i8*, i8** %20
%22 = bitcast i8* %21 to i32*
%23 = load i32, i32* %22
%24 = bitcast i64* %_local_id_x.i to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %24)
%25 = bitcast i64* %_local_id_y.i to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %25)
%26 = bitcast i64* %_local_id_z.i to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* %26)
%27 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3
%28 = load i8*, i8** %27, !noalias !36
%29 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4
%30 = load i32*, i32** %29, !noalias !36
%31 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5
%32 = load i32, i32* %31, !noalias !36
%33 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 3
%34 = load i8*, i8** %33, !noalias !36
%35 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 4
%36 = load i32*, i32** %35, !noalias !36
%37 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %1, i32 0, i32 5
%38 = load i32, i32* %37, !noalias !36
store i64 0, i64* %_local_id_x.i, !noalias !40
store i64 0, i64* %_local_id_y.i, !noalias !40
store i64 0, i64* %_local_id_z.i, !noalias !40
store i64 0, i64* %_local_id_x.i, !noalias !40
br label %pregion_for_entry..i.i
pregion_for_entry..i.i: ; preds = %.r_exit.i.i, %5
%39 = load i64, i64* %_local_id_z.i, !noalias !40, !llvm.mem.parallel_loop_access !16
%40 = load i64, i64* %_local_id_y.i, !noalias !40, !llvm.mem.parallel_loop_access !16
%41 = load i64, i64* %_local_id_x.i, !noalias !40, !llvm.mem.parallel_loop_access !16
%conv.i.i = trunc i64 %2 to i32
%mul.i.i = mul nsw i32 %conv.i.i, -128
%42 = load i64, i64* %_local_id_x.i, align 8, !tbaa !18, !noalias !40, !llvm.mem.parallel_loop_access !16
%conv2.i.i = trunc i64 %41 to i32
%add.i.i = xor i32 %conv2.i.i, -1
%add4.i.i = add i32 %add.i.i, %9
%add5.i.i = add i32 %add4.i.i, %mul.i.i
%cmp.i.i = icmp sgt i32 %add5.i.i, -1
br i1 %cmp.i.i, label %43, label %.r_exit.i.i
; <label>:43: ; preds = %pregion_for_entry..i.i
%mul9.i.i = shl nsw i32 %conv.i.i, 7
%add10.i.i = add i32 %mul9.i.i, %conv2.i.i
%add13.i.i = add i32 %add10.i.i, %23
%idxprom.i.i = sext i32 %add13.i.i to i64
%arrayidx.i.i = getelementptr inbounds double, double* %19, i64 %idxprom.i.i
%44 = bitcast double* %arrayidx.i.i to i64*
%45 = load i64, i64* %44, align 8, !tbaa !22, !alias.scope !44, !noalias !45, !llvm.mem.parallel_loop_access !16
%add20.i.i = add i32 %add10.i.i, %16
%idxprom21.i.i = sext i32 %add20.i.i to i64
%arrayidx22.i.i = getelementptr inbounds double, double* %12, i64 %idxprom21.i.i
%46 = bitcast double* %arrayidx22.i.i to i64*
store i64 %45, i64* %46, align 8, !tbaa !22, !alias.scope !45, !noalias !44, !llvm.mem.parallel_loop_access !16
br label %.r_exit.i.i
.r_exit.i.i: ; preds = %43, %pregion_for_entry..i.i
%47 = load i64, i64* %_local_id_x.i, !noalias !40
%48 = add i64 %47, 1
store i64 %48, i64* %_local_id_x.i, !noalias !40
%49 = load i64, i64* %_local_id_x.i, !noalias !40
%50 = icmp ult i64 %49, 128
br i1 %50, label %pregion_for_entry..i.i, label %_pocl_kernel_grudge_assign_0.exit, !llvm.loop !17
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i
%51 = bitcast i64* %_local_id_x.i to i8*
call void @llvm.lifetime.end.p0i8(i64 8, i8* %51)
%52 = bitcast i64* %_local_id_y.i to i8*
call void @llvm.lifetime.end.p0i8(i64 8, i8* %52)
%53 = bitcast i64* %_local_id_z.i to i8*
call void @llvm.lifetime.end.p0i8(i64 8, i8* %53)
ret void
; uselistorder directives
uselistorder i64* %_local_id_z.i, { 0, 2, 3, 1 }
uselistorder i64* %_local_id_y.i, { 0, 2, 3, 1 }
uselistorder i64* %_local_id_x.i, { 0, 2, 3, 4, 5, 6, 7, 8, 1 }
uselistorder i64 0, { 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 9, 10 }
uselistorder i32 5, { 0, 1, 2, 3, 5, 4 }
uselistorder i64 8, { 0, 2, 4, 1, 3, 5, 6, 8, 10, 7, 9, 11 }
uselistorder i32 4, { 0, 1, 4, 2, 3, 5, 7, 6 }
uselistorder i32 3, { 0, 1, 4, 2, 3, 5, 7, 6 }
uselistorder i32 0, { 0, 1, 2, 3, 4, 5, 12, 6, 7, 8, 9, 10, 11, 13, 17, 18, 19, 14, 15, 16 }
uselistorder i32 1, { 6, 0, 1, 2, 7, 3, 4, 5, 8, 9, 10 }
}
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2
attributes #0 = { noduplicate }
attributes #1 = { alwaysinline norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { argmemonly nounwind }
!llvm.module.flags = !{!0, !1, !2}
!opencl.ocl.version = !{!3}
!llvm.ident = !{!4}
!opencl.spir.version = !{!3}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 1, i32 2}
!4 = !{!"clang version 6.0.1 (tags/RELEASE_601/final)"}
!5 = !{i32 0, i32 1, i32 0, i32 1, i32 0}
!6 = !{!"none", !"none", !"none", !"none", !"none"}
!7 = !{!"int", !"double*", !"int", !"double*", !"int"}
!8 = !{!"", !"restrict", !"", !"restrict const", !""}
!9 = !{!"grdg_n", !"expr_8", !"expr_8_offset", !"grdg_sub_discr_dx0_dr0", !"grdg_sub_discr_dx0_dr0_offset"}
!10 = !{i32 128, i32 1, i32 1}
!11 = !{i32 1}
!12 = !{!13, !15}
!13 = distinct !{!13, !14, !"grudge_assign_0: %expr_8"}
!14 = distinct !{!14, !"grudge_assign_0"}
!15 = distinct !{!15, !14, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"}
!16 = !{!17}
!17 = distinct !{!17}
!18 = !{!19, !19, i64 0}
!19 = !{!"long", !20, i64 0}
!20 = !{!"omnipotent char", !21, i64 0}
!21 = !{!"Simple C/C++ TBAA"}
!22 = !{!23, !23, i64 0}
!23 = !{!"double", !20, i64 0}
!24 = !{!15}
!25 = !{!13}
!26 = !{!27, !29}
!27 = distinct !{!27, !28, !"_pocl_kernel_grudge_assign_0: argument 0"}
!28 = distinct !{!28, !"_pocl_kernel_grudge_assign_0"}
!29 = distinct !{!29, !28, !"_pocl_kernel_grudge_assign_0: argument 1"}
!30 = !{!31, !33, !27, !29}
!31 = distinct !{!31, !32, !"grudge_assign_0: %expr_8"}
!32 = distinct !{!32, !"grudge_assign_0"}
!33 = distinct !{!33, !32, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"}
!34 = !{!33, !29}
!35 = !{!31, !27}
!36 = !{!37, !39}
!37 = distinct !{!37, !38, !"_pocl_kernel_grudge_assign_0: argument 0"}
!38 = distinct !{!38, !"_pocl_kernel_grudge_assign_0"}
!39 = distinct !{!39, !38, !"_pocl_kernel_grudge_assign_0: argument 1"}
!40 = !{!41, !43, !37, !39}
!41 = distinct !{!41, !42, !"grudge_assign_0: %expr_8"}
!42 = distinct !{!42, !"grudge_assign_0"}
!43 = distinct !{!43, !42, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"}
!44 = !{!43, !39}
!45 = !{!41, !37}
--- out1.ll 2019-09-18 02:06:23.236601478 -0500
+++ out2.ll 2019-09-18 02:06:27.156668512 -0500
@@ -1,4 +1,4 @@
-; ModuleID = 'in1.ll'
+; ModuleID = 'in2.ll'
source_filename = "parallel_bc"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -8,8 +8,7 @@
vector.scevcheck:
%conv.i = trunc i64 %6 to i32
%mul.i = mul nsw i32 %conv.i, -128
- %add.i = add i32 %0, -1
- %add4.i = add i32 %add.i, %mul.i
+ %add4.i = add i32 %mul.i, %0
%mul9.i = shl nsw i32 %conv.i, 7
%9 = shl i32 %conv.i, 7
%10 = add i32 %9, %4
@@ -28,10 +27,10 @@
%broadcast.splat19 = shufflevector <4 x i32> %broadcast.splatinsert18, <4 x i32> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert20 = insertelement <4 x i32> undef, i32 %add4.i, i32 0
%broadcast.splat21 = shufflevector <4 x i32> %broadcast.splatinsert20, <4 x i32> undef, <4 x i32> zeroinitializer
- %15 = add <4 x i32> %broadcast.splat, <i32 0, i32 -1, i32 -2, i32 -3>
- %16 = add <4 x i32> %broadcast.splat17, <i32 -4, i32 -5, i32 -6, i32 -7>
- %17 = add <4 x i32> %broadcast.splat19, <i32 -8, i32 -9, i32 -10, i32 -11>
- %18 = add <4 x i32> %broadcast.splat21, <i32 -12, i32 -13, i32 -14, i32 -15>
+ %15 = add <4 x i32> %broadcast.splat, <i32 -1, i32 -2, i32 -3, i32 -4>
+ %16 = add <4 x i32> %broadcast.splat17, <i32 -5, i32 -6, i32 -7, i32 -8>
+ %17 = add <4 x i32> %broadcast.splat19, <i32 -9, i32 -10, i32 -11, i32 -12>
+ %18 = add <4 x i32> %broadcast.splat21, <i32 -13, i32 -14, i32 -15, i32 -16>
%19 = icmp sgt <4 x i32> %15, <i32 -1, i32 -1, i32 -1, i32 -1>
%20 = icmp sgt <4 x i32> %16, <i32 -1, i32 -1, i32 -1, i32 -1>
%21 = icmp sgt <4 x i32> %17, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -64,10 +63,10 @@
%41 = getelementptr inbounds double, double* %35, i64 12
%42 = bitcast double* %41 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24, <4 x i64>* %42, i32 8, <4 x i1> %22), !tbaa !12, !alias.scope !19, !noalias !16
- %43 = add <4 x i32> %broadcast.splat, <i32 -16, i32 -17, i32 -18, i32 -19>
- %44 = add <4 x i32> %broadcast.splat17, <i32 -20, i32 -21, i32 -22, i32 -23>
- %45 = add <4 x i32> %broadcast.splat19, <i32 -24, i32 -25, i32 -26, i32 -27>
- %46 = add <4 x i32> %broadcast.splat21, <i32 -28, i32 -29, i32 -30, i32 -31>
+ %43 = add <4 x i32> %broadcast.splat, <i32 -17, i32 -18, i32 -19, i32 -20>
+ %44 = add <4 x i32> %broadcast.splat17, <i32 -21, i32 -22, i32 -23, i32 -24>
+ %45 = add <4 x i32> %broadcast.splat19, <i32 -25, i32 -26, i32 -27, i32 -28>
+ %46 = add <4 x i32> %broadcast.splat21, <i32 -29, i32 -30, i32 -31, i32 -32>
%47 = icmp sgt <4 x i32> %43, <i32 -1, i32 -1, i32 -1, i32 -1>
%48 = icmp sgt <4 x i32> %44, <i32 -1, i32 -1, i32 -1, i32 -1>
%49 = icmp sgt <4 x i32> %45, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -101,10 +100,10 @@
%70 = getelementptr inbounds double, double* %64, i64 12
%71 = bitcast double* %70 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.1, <4 x i64>* %71, i32 8, <4 x i1> %50), !tbaa !12, !alias.scope !19, !noalias !16
- %72 = add <4 x i32> %broadcast.splat, <i32 -32, i32 -33, i32 -34, i32 -35>
- %73 = add <4 x i32> %broadcast.splat17, <i32 -36, i32 -37, i32 -38, i32 -39>
- %74 = add <4 x i32> %broadcast.splat19, <i32 -40, i32 -41, i32 -42, i32 -43>
- %75 = add <4 x i32> %broadcast.splat21, <i32 -44, i32 -45, i32 -46, i32 -47>
+ %72 = add <4 x i32> %broadcast.splat, <i32 -33, i32 -34, i32 -35, i32 -36>
+ %73 = add <4 x i32> %broadcast.splat17, <i32 -37, i32 -38, i32 -39, i32 -40>
+ %74 = add <4 x i32> %broadcast.splat19, <i32 -41, i32 -42, i32 -43, i32 -44>
+ %75 = add <4 x i32> %broadcast.splat21, <i32 -45, i32 -46, i32 -47, i32 -48>
%76 = icmp sgt <4 x i32> %72, <i32 -1, i32 -1, i32 -1, i32 -1>
%77 = icmp sgt <4 x i32> %73, <i32 -1, i32 -1, i32 -1, i32 -1>
%78 = icmp sgt <4 x i32> %74, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -138,10 +137,10 @@
%99 = getelementptr inbounds double, double* %93, i64 12
%100 = bitcast double* %99 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.2, <4 x i64>* %100, i32 8, <4 x i1> %79), !tbaa !12, !alias.scope !19, !noalias !16
- %101 = add <4 x i32> %broadcast.splat, <i32 -48, i32 -49, i32 -50, i32 -51>
- %102 = add <4 x i32> %broadcast.splat17, <i32 -52, i32 -53, i32 -54, i32 -55>
- %103 = add <4 x i32> %broadcast.splat19, <i32 -56, i32 -57, i32 -58, i32 -59>
- %104 = add <4 x i32> %broadcast.splat21, <i32 -60, i32 -61, i32 -62, i32 -63>
+ %101 = add <4 x i32> %broadcast.splat, <i32 -49, i32 -50, i32 -51, i32 -52>
+ %102 = add <4 x i32> %broadcast.splat17, <i32 -53, i32 -54, i32 -55, i32 -56>
+ %103 = add <4 x i32> %broadcast.splat19, <i32 -57, i32 -58, i32 -59, i32 -60>
+ %104 = add <4 x i32> %broadcast.splat21, <i32 -61, i32 -62, i32 -63, i32 -64>
%105 = icmp sgt <4 x i32> %101, <i32 -1, i32 -1, i32 -1, i32 -1>
%106 = icmp sgt <4 x i32> %102, <i32 -1, i32 -1, i32 -1, i32 -1>
%107 = icmp sgt <4 x i32> %103, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -175,10 +174,10 @@
%128 = getelementptr inbounds double, double* %122, i64 12
%129 = bitcast double* %128 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.3, <4 x i64>* %129, i32 8, <4 x i1> %108), !tbaa !12, !alias.scope !19, !noalias !16
- %130 = add <4 x i32> %broadcast.splat, <i32 -64, i32 -65, i32 -66, i32 -67>
- %131 = add <4 x i32> %broadcast.splat17, <i32 -68, i32 -69, i32 -70, i32 -71>
- %132 = add <4 x i32> %broadcast.splat19, <i32 -72, i32 -73, i32 -74, i32 -75>
- %133 = add <4 x i32> %broadcast.splat21, <i32 -76, i32 -77, i32 -78, i32 -79>
+ %130 = add <4 x i32> %broadcast.splat, <i32 -65, i32 -66, i32 -67, i32 -68>
+ %131 = add <4 x i32> %broadcast.splat17, <i32 -69, i32 -70, i32 -71, i32 -72>
+ %132 = add <4 x i32> %broadcast.splat19, <i32 -73, i32 -74, i32 -75, i32 -76>
+ %133 = add <4 x i32> %broadcast.splat21, <i32 -77, i32 -78, i32 -79, i32 -80>
%134 = icmp sgt <4 x i32> %130, <i32 -1, i32 -1, i32 -1, i32 -1>
%135 = icmp sgt <4 x i32> %131, <i32 -1, i32 -1, i32 -1, i32 -1>
%136 = icmp sgt <4 x i32> %132, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -212,10 +211,10 @@
%157 = getelementptr inbounds double, double* %151, i64 12
%158 = bitcast double* %157 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.4, <4 x i64>* %158, i32 8, <4 x i1> %137), !tbaa !12, !alias.scope !19, !noalias !16
- %159 = add <4 x i32> %broadcast.splat, <i32 -80, i32 -81, i32 -82, i32 -83>
- %160 = add <4 x i32> %broadcast.splat17, <i32 -84, i32 -85, i32 -86, i32 -87>
- %161 = add <4 x i32> %broadcast.splat19, <i32 -88, i32 -89, i32 -90, i32 -91>
- %162 = add <4 x i32> %broadcast.splat21, <i32 -92, i32 -93, i32 -94, i32 -95>
+ %159 = add <4 x i32> %broadcast.splat, <i32 -81, i32 -82, i32 -83, i32 -84>
+ %160 = add <4 x i32> %broadcast.splat17, <i32 -85, i32 -86, i32 -87, i32 -88>
+ %161 = add <4 x i32> %broadcast.splat19, <i32 -89, i32 -90, i32 -91, i32 -92>
+ %162 = add <4 x i32> %broadcast.splat21, <i32 -93, i32 -94, i32 -95, i32 -96>
%163 = icmp sgt <4 x i32> %159, <i32 -1, i32 -1, i32 -1, i32 -1>
%164 = icmp sgt <4 x i32> %160, <i32 -1, i32 -1, i32 -1, i32 -1>
%165 = icmp sgt <4 x i32> %161, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -249,10 +248,10 @@
%186 = getelementptr inbounds double, double* %180, i64 12
%187 = bitcast double* %186 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.5, <4 x i64>* %187, i32 8, <4 x i1> %166), !tbaa !12, !alias.scope !19, !noalias !16
- %188 = add <4 x i32> %broadcast.splat, <i32 -96, i32 -97, i32 -98, i32 -99>
- %189 = add <4 x i32> %broadcast.splat17, <i32 -100, i32 -101, i32 -102, i32 -103>
- %190 = add <4 x i32> %broadcast.splat19, <i32 -104, i32 -105, i32 -106, i32 -107>
- %191 = add <4 x i32> %broadcast.splat21, <i32 -108, i32 -109, i32 -110, i32 -111>
+ %188 = add <4 x i32> %broadcast.splat, <i32 -97, i32 -98, i32 -99, i32 -100>
+ %189 = add <4 x i32> %broadcast.splat17, <i32 -101, i32 -102, i32 -103, i32 -104>
+ %190 = add <4 x i32> %broadcast.splat19, <i32 -105, i32 -106, i32 -107, i32 -108>
+ %191 = add <4 x i32> %broadcast.splat21, <i32 -109, i32 -110, i32 -111, i32 -112>
%192 = icmp sgt <4 x i32> %188, <i32 -1, i32 -1, i32 -1, i32 -1>
%193 = icmp sgt <4 x i32> %189, <i32 -1, i32 -1, i32 -1, i32 -1>
%194 = icmp sgt <4 x i32> %190, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -286,10 +285,10 @@
%215 = getelementptr inbounds double, double* %209, i64 12
%216 = bitcast double* %215 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.6, <4 x i64>* %216, i32 8, <4 x i1> %195), !tbaa !12, !alias.scope !19, !noalias !16
- %217 = add <4 x i32> %broadcast.splat, <i32 -112, i32 -113, i32 -114, i32 -115>
- %218 = add <4 x i32> %broadcast.splat17, <i32 -116, i32 -117, i32 -118, i32 -119>
- %219 = add <4 x i32> %broadcast.splat19, <i32 -120, i32 -121, i32 -122, i32 -123>
- %220 = add <4 x i32> %broadcast.splat21, <i32 -124, i32 -125, i32 -126, i32 -127>
+ %217 = add <4 x i32> %broadcast.splat, <i32 -113, i32 -114, i32 -115, i32 -116>
+ %218 = add <4 x i32> %broadcast.splat17, <i32 -117, i32 -118, i32 -119, i32 -120>
+ %219 = add <4 x i32> %broadcast.splat19, <i32 -121, i32 -122, i32 -123, i32 -124>
+ %220 = add <4 x i32> %broadcast.splat21, <i32 -125, i32 -126, i32 -127, i32 -128>
%221 = icmp sgt <4 x i32> %217, <i32 -1, i32 -1, i32 -1, i32 -1>
%222 = icmp sgt <4 x i32> %218, <i32 -1, i32 -1, i32 -1, i32 -1>
%223 = icmp sgt <4 x i32> %219, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -325,10 +324,11 @@
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.7, <4 x i64>* %245, i32 8, <4 x i1> %224), !tbaa !12, !alias.scope !19, !noalias !16
br label %grudge_assign_0.exit
-pregion_for_entry..i: ; preds = %vector.scevcheck, %.r_exit.i.3
- %_local_id_x.0 = phi i64 [ %265, %.r_exit.i.3 ], [ 0, %vector.scevcheck ]
+pregion_for_entry..i: ; preds = %vector.scevcheck, %.r_exit.i.1
+ %_local_id_x.0 = phi i64 [ %255, %.r_exit.i.1 ], [ 0, %vector.scevcheck ]
%conv2.i = trunc i64 %_local_id_x.0 to i32
- %add5.i = sub i32 %add4.i, %conv2.i
+ %add.i = xor i32 %conv2.i, -1
+ %add5.i = add i32 %add4.i, %add.i
%cmp.i = icmp sgt i32 %add5.i, -1
br i1 %cmp.i, label %246, label %.r_exit.i
@@ -348,15 +348,16 @@
.r_exit.i: ; preds = %246, %pregion_for_entry..i
%250 = trunc i64 %_local_id_x.0 to i32
- %conv2.i.1 = or i32 %250, 1
- %add5.i.1 = sub i32 %add4.i, %conv2.i.1
+ %add.i.1 = xor i32 %250, -2
+ %add5.i.1 = add i32 %add4.i, %add.i.1
%cmp.i.1 = icmp sgt i32 %add5.i.1, -1
br i1 %cmp.i.1, label %251, label %.r_exit.i.1
-grudge_assign_0.exit: ; preds = %.r_exit.i.3, %vector.ph
+grudge_assign_0.exit: ; preds = %.r_exit.i.1, %vector.ph
ret void
; <label>:251: ; preds = %.r_exit.i
+ %conv2.i.1 = or i32 %250, 1
%add10.i.1 = add nuw nsw i32 %mul9.i, %conv2.i.1
%add13.i.1 = add i32 %add10.i.1, %4
%idxprom.i.1 = sext i32 %add13.i.1 to i64
@@ -371,51 +372,9 @@
br label %.r_exit.i.1
.r_exit.i.1: ; preds = %251, %.r_exit.i
- %255 = trunc i64 %_local_id_x.0 to i32
- %conv2.i.2 = or i32 %255, 2
- %add5.i.2 = sub i32 %add4.i, %conv2.i.2
- %cmp.i.2 = icmp sgt i32 %add5.i.2, -1
- br i1 %cmp.i.2, label %256, label %.r_exit.i.2
-
-; <label>:256: ; preds = %.r_exit.i.1
- %add10.i.2 = add nuw nsw i32 %mul9.i, %conv2.i.2
- %add13.i.2 = add i32 %add10.i.2, %4
- %idxprom.i.2 = sext i32 %add13.i.2 to i64
- %arrayidx.i.2 = getelementptr inbounds double, double* %3, i64 %idxprom.i.2
- %257 = bitcast double* %arrayidx.i.2 to i64*
- %258 = load i64, i64* %257, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21
- %add20.i.2 = add i32 %add10.i.2, %2
- %idxprom21.i.2 = sext i32 %add20.i.2 to i64
- %arrayidx22.i.2 = getelementptr inbounds double, double* %1, i64 %idxprom21.i.2
- %259 = bitcast double* %arrayidx22.i.2 to i64*
- store i64 %258, i64* %259, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21
- br label %.r_exit.i.2
-
-.r_exit.i.2: ; preds = %256, %.r_exit.i.1
- %260 = trunc i64 %_local_id_x.0 to i32
- %conv2.i.3 = or i32 %260, 3
- %add5.i.3 = sub i32 %add4.i, %conv2.i.3
- %cmp.i.3 = icmp sgt i32 %add5.i.3, -1
- br i1 %cmp.i.3, label %261, label %.r_exit.i.3
-
-; <label>:261: ; preds = %.r_exit.i.2
- %add10.i.3 = add nuw nsw i32 %mul9.i, %conv2.i.3
- %add13.i.3 = add i32 %add10.i.3, %4
- %idxprom.i.3 = sext i32 %add13.i.3 to i64
- %arrayidx.i.3 = getelementptr inbounds double, double* %3, i64 %idxprom.i.3
- %262 = bitcast double* %arrayidx.i.3 to i64*
- %263 = load i64, i64* %262, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21
- %add20.i.3 = add i32 %add10.i.3, %2
- %idxprom21.i.3 = sext i32 %add20.i.3 to i64
- %arrayidx22.i.3 = getelementptr inbounds double, double* %1, i64 %idxprom21.i.3
- %264 = bitcast double* %arrayidx22.i.3 to i64*
- store i64 %263, i64* %264, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21
- br label %.r_exit.i.3
-
-.r_exit.i.3: ; preds = %261, %.r_exit.i.2
- %265 = add nuw nsw i64 %_local_id_x.0, 4
- %exitcond.3 = icmp eq i64 %265, 128
- br i1 %exitcond.3, label %grudge_assign_0.exit, label %pregion_for_entry..i, !llvm.loop !23
+ %255 = add nuw nsw i64 %_local_id_x.0, 2
+ %exitcond.1 = icmp eq i64 %255, 128
+ br i1 %exitcond.1, label %grudge_assign_0.exit, label %pregion_for_entry..i, !llvm.loop !23
}
; Function Attrs: norecurse nounwind
@@ -441,15 +400,15 @@
%24 = load i32, i32* %23, align 4
%conv.i.i = trunc i64 %2 to i32
%mul.i.i = mul nsw i32 %conv.i.i, -128
- %add.i.i = add i32 %mul.i.i, -1
- %add4.i.i = add i32 %add.i.i, %8
+ %add4.i.i = add i32 %8, %mul.i.i
%mul9.i.i = shl nsw i32 %conv.i.i, 7
br label %pregion_for_entry..i.i
pregion_for_entry..i.i: ; preds = %.r_exit.i.i.1, %5
%_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %34, %.r_exit.i.i.1 ]
%conv2.i.i = trunc i64 %_local_id_x.i.0 to i32
- %add5.i.i = sub i32 %add4.i.i, %conv2.i.i
+ %add.i.i = xor i32 %conv2.i.i, -1
+ %add5.i.i = add i32 %add4.i.i, %add.i.i
%cmp.i.i = icmp sgt i32 %add5.i.i, -1
br i1 %cmp.i.i, label %25, label %.r_exit.i.i
@@ -469,8 +428,8 @@
.r_exit.i.i: ; preds = %25, %pregion_for_entry..i.i
%29 = trunc i64 %_local_id_x.i.0 to i32
- %conv2.i.i.1 = or i32 %29, 1
- %add5.i.i.1 = sub i32 %add4.i.i, %conv2.i.i.1
+ %add.i.i.1 = xor i32 %29, -2
+ %add5.i.i.1 = add i32 %add4.i.i, %add.i.i.1
%cmp.i.i.1 = icmp sgt i32 %add5.i.i.1, -1
br i1 %cmp.i.i.1, label %30, label %.r_exit.i.i.1
@@ -478,6 +437,7 @@
ret void
; <label>:30: ; preds = %.r_exit.i.i
+ %conv2.i.i.1 = or i32 %29, 1
%add10.i.i.1 = add nuw nsw i32 %mul9.i.i, %conv2.i.i.1
%add13.i.i.1 = add i32 %add10.i.i.1, %24
%idxprom.i.i.1 = sext i32 %add13.i.i.1 to i64
@@ -518,15 +478,15 @@
%22 = load i32, i32* %21, align 4
%conv.i.i = trunc i64 %2 to i32
%mul.i.i = mul nsw i32 %conv.i.i, -128
- %add.i.i = add i32 %mul.i.i, -1
- %add4.i.i = add i32 %add.i.i, %8
+ %add4.i.i = add i32 %8, %mul.i.i
%mul9.i.i = shl nsw i32 %conv.i.i, 7
br label %pregion_for_entry..i.i
pregion_for_entry..i.i: ; preds = %.r_exit.i.i.1, %5
%_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %32, %.r_exit.i.i.1 ]
%conv2.i.i = trunc i64 %_local_id_x.i.0 to i32
- %add5.i.i = sub i32 %add4.i.i, %conv2.i.i
+ %add.i.i = xor i32 %conv2.i.i, -1
+ %add5.i.i = add i32 %add4.i.i, %add.i.i
%cmp.i.i = icmp sgt i32 %add5.i.i, -1
br i1 %cmp.i.i, label %23, label %.r_exit.i.i
@@ -546,8 +506,8 @@
.r_exit.i.i: ; preds = %23, %pregion_for_entry..i.i
%27 = trunc i64 %_local_id_x.i.0 to i32
- %conv2.i.i.1 = or i32 %27, 1
- %add5.i.i.1 = sub i32 %add4.i.i, %conv2.i.i.1
+ %add.i.i.1 = xor i32 %27, -2
+ %add5.i.i.1 = add i32 %add4.i.i, %add.i.i.1
%cmp.i.i.1 = icmp sgt i32 %add5.i.i.1, -1
br i1 %cmp.i.i.1, label %28, label %.r_exit.i.i.1
@@ -555,6 +515,7 @@
ret void
; <label>:28: ; preds = %.r_exit.i.i
+ %conv2.i.i.1 = or i32 %27, 1
%add10.i.i.1 = add nuw nsw i32 %mul9.i.i, %conv2.i.i.1
%add13.i.i.1 = add i32 %add10.i.i.1, %22
%idxprom.i.i.1 = sext i32 %add13.i.i.1 to i64
; ModuleID = 'in1.ll'
source_filename = "parallel_bc"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: alwaysinline norecurse nounwind
define void @_pocl_kernel_grudge_assign_0(i32, double* noalias nocapture, i32, double* noalias nocapture readonly, i32, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone, i64, i64, i64) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_name !9 !reqd_work_group_size !10 !pocl_generated !11 {
vector.scevcheck:
%conv.i = trunc i64 %6 to i32
%mul.i = mul nsw i32 %conv.i, -128
%add.i = add i32 %0, -1
%add4.i = add i32 %add.i, %mul.i
%mul9.i = shl nsw i32 %conv.i, 7
%9 = shl i32 %conv.i, 7
%10 = add i32 %9, %4
%11 = icmp sgt i32 %10, 2147483520
%12 = add i32 %9, %2
%13 = icmp sgt i32 %12, 2147483520
%14 = or i1 %11, %13
br i1 %14, label %pregion_for_entry..i, label %vector.ph
vector.ph: ; preds = %vector.scevcheck
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %add4.i, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert16 = insertelement <4 x i32> undef, i32 %add4.i, i32 0
%broadcast.splat17 = shufflevector <4 x i32> %broadcast.splatinsert16, <4 x i32> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert18 = insertelement <4 x i32> undef, i32 %add4.i, i32 0
%broadcast.splat19 = shufflevector <4 x i32> %broadcast.splatinsert18, <4 x i32> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert20 = insertelement <4 x i32> undef, i32 %add4.i, i32 0
%broadcast.splat21 = shufflevector <4 x i32> %broadcast.splatinsert20, <4 x i32> undef, <4 x i32> zeroinitializer
%15 = add <4 x i32> %broadcast.splat, <i32 0, i32 -1, i32 -2, i32 -3>
%16 = add <4 x i32> %broadcast.splat17, <i32 -4, i32 -5, i32 -6, i32 -7>
%17 = add <4 x i32> %broadcast.splat19, <i32 -8, i32 -9, i32 -10, i32 -11>
%18 = add <4 x i32> %broadcast.splat21, <i32 -12, i32 -13, i32 -14, i32 -15>
%19 = icmp sgt <4 x i32> %15, <i32 -1, i32 -1, i32 -1, i32 -1>
%20 = icmp sgt <4 x i32> %16, <i32 -1, i32 -1, i32 -1, i32 -1>
%21 = icmp sgt <4 x i32> %17, <i32 -1, i32 -1, i32 -1, i32 -1>
%22 = icmp sgt <4 x i32> %18, <i32 -1, i32 -1, i32 -1, i32 -1>
%23 = add i32 %mul9.i, %4
%24 = sext i32 %23 to i64
%25 = getelementptr inbounds double, double* %3, i64 %24
%26 = bitcast double* %25 to <4 x i64>*
%wide.load = load <4 x i64>, <4 x i64>* %26, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%27 = getelementptr inbounds double, double* %25, i64 4
%28 = bitcast double* %27 to <4 x i64>*
%wide.load22 = load <4 x i64>, <4 x i64>* %28, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%29 = getelementptr inbounds double, double* %25, i64 8
%30 = bitcast double* %29 to <4 x i64>*
%wide.load23 = load <4 x i64>, <4 x i64>* %30, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%31 = getelementptr inbounds double, double* %25, i64 12
%32 = bitcast double* %31 to <4 x i64>*
%wide.load24 = load <4 x i64>, <4 x i64>* %32, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%33 = add i32 %mul9.i, %2
%34 = sext i32 %33 to i64
%35 = getelementptr inbounds double, double* %1, i64 %34
%36 = bitcast double* %35 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load, <4 x i64>* %36, i32 8, <4 x i1> %19), !tbaa !12, !alias.scope !19, !noalias !16
%37 = getelementptr inbounds double, double* %35, i64 4
%38 = bitcast double* %37 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22, <4 x i64>* %38, i32 8, <4 x i1> %20), !tbaa !12, !alias.scope !19, !noalias !16
%39 = getelementptr inbounds double, double* %35, i64 8
%40 = bitcast double* %39 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23, <4 x i64>* %40, i32 8, <4 x i1> %21), !tbaa !12, !alias.scope !19, !noalias !16
%41 = getelementptr inbounds double, double* %35, i64 12
%42 = bitcast double* %41 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24, <4 x i64>* %42, i32 8, <4 x i1> %22), !tbaa !12, !alias.scope !19, !noalias !16
%43 = add <4 x i32> %broadcast.splat, <i32 -16, i32 -17, i32 -18, i32 -19>
%44 = add <4 x i32> %broadcast.splat17, <i32 -20, i32 -21, i32 -22, i32 -23>
%45 = add <4 x i32> %broadcast.splat19, <i32 -24, i32 -25, i32 -26, i32 -27>
%46 = add <4 x i32> %broadcast.splat21, <i32 -28, i32 -29, i32 -30, i32 -31>
%47 = icmp sgt <4 x i32> %43, <i32 -1, i32 -1, i32 -1, i32 -1>
%48 = icmp sgt <4 x i32> %44, <i32 -1, i32 -1, i32 -1, i32 -1>
%49 = icmp sgt <4 x i32> %45, <i32 -1, i32 -1, i32 -1, i32 -1>
%50 = icmp sgt <4 x i32> %46, <i32 -1, i32 -1, i32 -1, i32 -1>
%51 = or i32 %mul9.i, 16
%52 = add i32 %51, %4
%53 = sext i32 %52 to i64
%54 = getelementptr inbounds double, double* %3, i64 %53
%55 = bitcast double* %54 to <4 x i64>*
%wide.load.1 = load <4 x i64>, <4 x i64>* %55, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%56 = getelementptr inbounds double, double* %54, i64 4
%57 = bitcast double* %56 to <4 x i64>*
%wide.load22.1 = load <4 x i64>, <4 x i64>* %57, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%58 = getelementptr inbounds double, double* %54, i64 8
%59 = bitcast double* %58 to <4 x i64>*
%wide.load23.1 = load <4 x i64>, <4 x i64>* %59, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%60 = getelementptr inbounds double, double* %54, i64 12
%61 = bitcast double* %60 to <4 x i64>*
%wide.load24.1 = load <4 x i64>, <4 x i64>* %61, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%62 = add i32 %51, %2
%63 = sext i32 %62 to i64
%64 = getelementptr inbounds double, double* %1, i64 %63
%65 = bitcast double* %64 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.1, <4 x i64>* %65, i32 8, <4 x i1> %47), !tbaa !12, !alias.scope !19, !noalias !16
%66 = getelementptr inbounds double, double* %64, i64 4
%67 = bitcast double* %66 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.1, <4 x i64>* %67, i32 8, <4 x i1> %48), !tbaa !12, !alias.scope !19, !noalias !16
%68 = getelementptr inbounds double, double* %64, i64 8
%69 = bitcast double* %68 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.1, <4 x i64>* %69, i32 8, <4 x i1> %49), !tbaa !12, !alias.scope !19, !noalias !16
%70 = getelementptr inbounds double, double* %64, i64 12
%71 = bitcast double* %70 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.1, <4 x i64>* %71, i32 8, <4 x i1> %50), !tbaa !12, !alias.scope !19, !noalias !16
%72 = add <4 x i32> %broadcast.splat, <i32 -32, i32 -33, i32 -34, i32 -35>
%73 = add <4 x i32> %broadcast.splat17, <i32 -36, i32 -37, i32 -38, i32 -39>
%74 = add <4 x i32> %broadcast.splat19, <i32 -40, i32 -41, i32 -42, i32 -43>
%75 = add <4 x i32> %broadcast.splat21, <i32 -44, i32 -45, i32 -46, i32 -47>
%76 = icmp sgt <4 x i32> %72, <i32 -1, i32 -1, i32 -1, i32 -1>
%77 = icmp sgt <4 x i32> %73, <i32 -1, i32 -1, i32 -1, i32 -1>
%78 = icmp sgt <4 x i32> %74, <i32 -1, i32 -1, i32 -1, i32 -1>
%79 = icmp sgt <4 x i32> %75, <i32 -1, i32 -1, i32 -1, i32 -1>
%80 = or i32 %mul9.i, 32
%81 = add i32 %80, %4
%82 = sext i32 %81 to i64
%83 = getelementptr inbounds double, double* %3, i64 %82
%84 = bitcast double* %83 to <4 x i64>*
%wide.load.2 = load <4 x i64>, <4 x i64>* %84, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%85 = getelementptr inbounds double, double* %83, i64 4
%86 = bitcast double* %85 to <4 x i64>*
%wide.load22.2 = load <4 x i64>, <4 x i64>* %86, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%87 = getelementptr inbounds double, double* %83, i64 8
%88 = bitcast double* %87 to <4 x i64>*
%wide.load23.2 = load <4 x i64>, <4 x i64>* %88, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%89 = getelementptr inbounds double, double* %83, i64 12
%90 = bitcast double* %89 to <4 x i64>*
%wide.load24.2 = load <4 x i64>, <4 x i64>* %90, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%91 = add i32 %80, %2
%92 = sext i32 %91 to i64
%93 = getelementptr inbounds double, double* %1, i64 %92
%94 = bitcast double* %93 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.2, <4 x i64>* %94, i32 8, <4 x i1> %76), !tbaa !12, !alias.scope !19, !noalias !16
%95 = getelementptr inbounds double, double* %93, i64 4
%96 = bitcast double* %95 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.2, <4 x i64>* %96, i32 8, <4 x i1> %77), !tbaa !12, !alias.scope !19, !noalias !16
%97 = getelementptr inbounds double, double* %93, i64 8
%98 = bitcast double* %97 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.2, <4 x i64>* %98, i32 8, <4 x i1> %78), !tbaa !12, !alias.scope !19, !noalias !16
%99 = getelementptr inbounds double, double* %93, i64 12
%100 = bitcast double* %99 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.2, <4 x i64>* %100, i32 8, <4 x i1> %79), !tbaa !12, !alias.scope !19, !noalias !16
%101 = add <4 x i32> %broadcast.splat, <i32 -48, i32 -49, i32 -50, i32 -51>
%102 = add <4 x i32> %broadcast.splat17, <i32 -52, i32 -53, i32 -54, i32 -55>
%103 = add <4 x i32> %broadcast.splat19, <i32 -56, i32 -57, i32 -58, i32 -59>
%104 = add <4 x i32> %broadcast.splat21, <i32 -60, i32 -61, i32 -62, i32 -63>
%105 = icmp sgt <4 x i32> %101, <i32 -1, i32 -1, i32 -1, i32 -1>
%106 = icmp sgt <4 x i32> %102, <i32 -1, i32 -1, i32 -1, i32 -1>
%107 = icmp sgt <4 x i32> %103, <i32 -1, i32 -1, i32 -1, i32 -1>
%108 = icmp sgt <4 x i32> %104, <i32 -1, i32 -1, i32 -1, i32 -1>
%109 = or i32 %mul9.i, 48
%110 = add i32 %109, %4
%111 = sext i32 %110 to i64
%112 = getelementptr inbounds double, double* %3, i64 %111
%113 = bitcast double* %112 to <4 x i64>*
%wide.load.3 = load <4 x i64>, <4 x i64>* %113, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%114 = getelementptr inbounds double, double* %112, i64 4
%115 = bitcast double* %114 to <4 x i64>*
%wide.load22.3 = load <4 x i64>, <4 x i64>* %115, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%116 = getelementptr inbounds double, double* %112, i64 8
%117 = bitcast double* %116 to <4 x i64>*
%wide.load23.3 = load <4 x i64>, <4 x i64>* %117, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%118 = getelementptr inbounds double, double* %112, i64 12
%119 = bitcast double* %118 to <4 x i64>*
%wide.load24.3 = load <4 x i64>, <4 x i64>* %119, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%120 = add i32 %109, %2
%121 = sext i32 %120 to i64
%122 = getelementptr inbounds double, double* %1, i64 %121
%123 = bitcast double* %122 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.3, <4 x i64>* %123, i32 8, <4 x i1> %105), !tbaa !12, !alias.scope !19, !noalias !16
%124 = getelementptr inbounds double, double* %122, i64 4
%125 = bitcast double* %124 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.3, <4 x i64>* %125, i32 8, <4 x i1> %106), !tbaa !12, !alias.scope !19, !noalias !16
%126 = getelementptr inbounds double, double* %122, i64 8
%127 = bitcast double* %126 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.3, <4 x i64>* %127, i32 8, <4 x i1> %107), !tbaa !12, !alias.scope !19, !noalias !16
%128 = getelementptr inbounds double, double* %122, i64 12
%129 = bitcast double* %128 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.3, <4 x i64>* %129, i32 8, <4 x i1> %108), !tbaa !12, !alias.scope !19, !noalias !16
%130 = add <4 x i32> %broadcast.splat, <i32 -64, i32 -65, i32 -66, i32 -67>
%131 = add <4 x i32> %broadcast.splat17, <i32 -68, i32 -69, i32 -70, i32 -71>
%132 = add <4 x i32> %broadcast.splat19, <i32 -72, i32 -73, i32 -74, i32 -75>
%133 = add <4 x i32> %broadcast.splat21, <i32 -76, i32 -77, i32 -78, i32 -79>
%134 = icmp sgt <4 x i32> %130, <i32 -1, i32 -1, i32 -1, i32 -1>
%135 = icmp sgt <4 x i32> %131, <i32 -1, i32 -1, i32 -1, i32 -1>
%136 = icmp sgt <4 x i32> %132, <i32 -1, i32 -1, i32 -1, i32 -1>
%137 = icmp sgt <4 x i32> %133, <i32 -1, i32 -1, i32 -1, i32 -1>
%138 = or i32 %mul9.i, 64
%139 = add i32 %138, %4
%140 = sext i32 %139 to i64
%141 = getelementptr inbounds double, double* %3, i64 %140
%142 = bitcast double* %141 to <4 x i64>*
%wide.load.4 = load <4 x i64>, <4 x i64>* %142, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%143 = getelementptr inbounds double, double* %141, i64 4
%144 = bitcast double* %143 to <4 x i64>*
%wide.load22.4 = load <4 x i64>, <4 x i64>* %144, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%145 = getelementptr inbounds double, double* %141, i64 8
%146 = bitcast double* %145 to <4 x i64>*
%wide.load23.4 = load <4 x i64>, <4 x i64>* %146, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%147 = getelementptr inbounds double, double* %141, i64 12
%148 = bitcast double* %147 to <4 x i64>*
%wide.load24.4 = load <4 x i64>, <4 x i64>* %148, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%149 = add i32 %138, %2
%150 = sext i32 %149 to i64
%151 = getelementptr inbounds double, double* %1, i64 %150
%152 = bitcast double* %151 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.4, <4 x i64>* %152, i32 8, <4 x i1> %134), !tbaa !12, !alias.scope !19, !noalias !16
%153 = getelementptr inbounds double, double* %151, i64 4
%154 = bitcast double* %153 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.4, <4 x i64>* %154, i32 8, <4 x i1> %135), !tbaa !12, !alias.scope !19, !noalias !16
%155 = getelementptr inbounds double, double* %151, i64 8
%156 = bitcast double* %155 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.4, <4 x i64>* %156, i32 8, <4 x i1> %136), !tbaa !12, !alias.scope !19, !noalias !16
%157 = getelementptr inbounds double, double* %151, i64 12
%158 = bitcast double* %157 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.4, <4 x i64>* %158, i32 8, <4 x i1> %137), !tbaa !12, !alias.scope !19, !noalias !16
%159 = add <4 x i32> %broadcast.splat, <i32 -80, i32 -81, i32 -82, i32 -83>
%160 = add <4 x i32> %broadcast.splat17, <i32 -84, i32 -85, i32 -86, i32 -87>
%161 = add <4 x i32> %broadcast.splat19, <i32 -88, i32 -89, i32 -90, i32 -91>
%162 = add <4 x i32> %broadcast.splat21, <i32 -92, i32 -93, i32 -94, i32 -95>
%163 = icmp sgt <4 x i32> %159, <i32 -1, i32 -1, i32 -1, i32 -1>
%164 = icmp sgt <4 x i32> %160, <i32 -1, i32 -1, i32 -1, i32 -1>
%165 = icmp sgt <4 x i32> %161, <i32 -1, i32 -1, i32 -1, i32 -1>
%166 = icmp sgt <4 x i32> %162, <i32 -1, i32 -1, i32 -1, i32 -1>
%167 = or i32 %mul9.i, 80
%168 = add i32 %167, %4
%169 = sext i32 %168 to i64
%170 = getelementptr inbounds double, double* %3, i64 %169
%171 = bitcast double* %170 to <4 x i64>*
%wide.load.5 = load <4 x i64>, <4 x i64>* %171, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%172 = getelementptr inbounds double, double* %170, i64 4
%173 = bitcast double* %172 to <4 x i64>*
%wide.load22.5 = load <4 x i64>, <4 x i64>* %173, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%174 = getelementptr inbounds double, double* %170, i64 8
%175 = bitcast double* %174 to <4 x i64>*
%wide.load23.5 = load <4 x i64>, <4 x i64>* %175, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%176 = getelementptr inbounds double, double* %170, i64 12
%177 = bitcast double* %176 to <4 x i64>*
%wide.load24.5 = load <4 x i64>, <4 x i64>* %177, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%178 = add i32 %167, %2
%179 = sext i32 %178 to i64
%180 = getelementptr inbounds double, double* %1, i64 %179
%181 = bitcast double* %180 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.5, <4 x i64>* %181, i32 8, <4 x i1> %163), !tbaa !12, !alias.scope !19, !noalias !16
%182 = getelementptr inbounds double, double* %180, i64 4
%183 = bitcast double* %182 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.5, <4 x i64>* %183, i32 8, <4 x i1> %164), !tbaa !12, !alias.scope !19, !noalias !16
%184 = getelementptr inbounds double, double* %180, i64 8
%185 = bitcast double* %184 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.5, <4 x i64>* %185, i32 8, <4 x i1> %165), !tbaa !12, !alias.scope !19, !noalias !16
%186 = getelementptr inbounds double, double* %180, i64 12
%187 = bitcast double* %186 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.5, <4 x i64>* %187, i32 8, <4 x i1> %166), !tbaa !12, !alias.scope !19, !noalias !16
%188 = add <4 x i32> %broadcast.splat, <i32 -96, i32 -97, i32 -98, i32 -99>
%189 = add <4 x i32> %broadcast.splat17, <i32 -100, i32 -101, i32 -102, i32 -103>
%190 = add <4 x i32> %broadcast.splat19, <i32 -104, i32 -105, i32 -106, i32 -107>
%191 = add <4 x i32> %broadcast.splat21, <i32 -108, i32 -109, i32 -110, i32 -111>
%192 = icmp sgt <4 x i32> %188, <i32 -1, i32 -1, i32 -1, i32 -1>
%193 = icmp sgt <4 x i32> %189, <i32 -1, i32 -1, i32 -1, i32 -1>
%194 = icmp sgt <4 x i32> %190, <i32 -1, i32 -1, i32 -1, i32 -1>
%195 = icmp sgt <4 x i32> %191, <i32 -1, i32 -1, i32 -1, i32 -1>
%196 = or i32 %mul9.i, 96
%197 = add i32 %196, %4
%198 = sext i32 %197 to i64
%199 = getelementptr inbounds double, double* %3, i64 %198
%200 = bitcast double* %199 to <4 x i64>*
%wide.load.6 = load <4 x i64>, <4 x i64>* %200, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%201 = getelementptr inbounds double, double* %199, i64 4
%202 = bitcast double* %201 to <4 x i64>*
%wide.load22.6 = load <4 x i64>, <4 x i64>* %202, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%203 = getelementptr inbounds double, double* %199, i64 8
%204 = bitcast double* %203 to <4 x i64>*
%wide.load23.6 = load <4 x i64>, <4 x i64>* %204, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%205 = getelementptr inbounds double, double* %199, i64 12
%206 = bitcast double* %205 to <4 x i64>*
%wide.load24.6 = load <4 x i64>, <4 x i64>* %206, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%207 = add i32 %196, %2
%208 = sext i32 %207 to i64
%209 = getelementptr inbounds double, double* %1, i64 %208
%210 = bitcast double* %209 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.6, <4 x i64>* %210, i32 8, <4 x i1> %192), !tbaa !12, !alias.scope !19, !noalias !16
%211 = getelementptr inbounds double, double* %209, i64 4
%212 = bitcast double* %211 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.6, <4 x i64>* %212, i32 8, <4 x i1> %193), !tbaa !12, !alias.scope !19, !noalias !16
%213 = getelementptr inbounds double, double* %209, i64 8
%214 = bitcast double* %213 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.6, <4 x i64>* %214, i32 8, <4 x i1> %194), !tbaa !12, !alias.scope !19, !noalias !16
%215 = getelementptr inbounds double, double* %209, i64 12
%216 = bitcast double* %215 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.6, <4 x i64>* %216, i32 8, <4 x i1> %195), !tbaa !12, !alias.scope !19, !noalias !16
%217 = add <4 x i32> %broadcast.splat, <i32 -112, i32 -113, i32 -114, i32 -115>
%218 = add <4 x i32> %broadcast.splat17, <i32 -116, i32 -117, i32 -118, i32 -119>
%219 = add <4 x i32> %broadcast.splat19, <i32 -120, i32 -121, i32 -122, i32 -123>
%220 = add <4 x i32> %broadcast.splat21, <i32 -124, i32 -125, i32 -126, i32 -127>
%221 = icmp sgt <4 x i32> %217, <i32 -1, i32 -1, i32 -1, i32 -1>
%222 = icmp sgt <4 x i32> %218, <i32 -1, i32 -1, i32 -1, i32 -1>
%223 = icmp sgt <4 x i32> %219, <i32 -1, i32 -1, i32 -1, i32 -1>
%224 = icmp sgt <4 x i32> %220, <i32 -1, i32 -1, i32 -1, i32 -1>
%225 = or i32 %mul9.i, 112
%226 = add i32 %225, %4
%227 = sext i32 %226 to i64
%228 = getelementptr inbounds double, double* %3, i64 %227
%229 = bitcast double* %228 to <4 x i64>*
%wide.load.7 = load <4 x i64>, <4 x i64>* %229, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%230 = getelementptr inbounds double, double* %228, i64 4
%231 = bitcast double* %230 to <4 x i64>*
%wide.load22.7 = load <4 x i64>, <4 x i64>* %231, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%232 = getelementptr inbounds double, double* %228, i64 8
%233 = bitcast double* %232 to <4 x i64>*
%wide.load23.7 = load <4 x i64>, <4 x i64>* %233, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%234 = getelementptr inbounds double, double* %228, i64 12
%235 = bitcast double* %234 to <4 x i64>*
%wide.load24.7 = load <4 x i64>, <4 x i64>* %235, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%236 = add i32 %225, %2
%237 = sext i32 %236 to i64
%238 = getelementptr inbounds double, double* %1, i64 %237
%239 = bitcast double* %238 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.7, <4 x i64>* %239, i32 8, <4 x i1> %221), !tbaa !12, !alias.scope !19, !noalias !16
%240 = getelementptr inbounds double, double* %238, i64 4
%241 = bitcast double* %240 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.7, <4 x i64>* %241, i32 8, <4 x i1> %222), !tbaa !12, !alias.scope !19, !noalias !16
%242 = getelementptr inbounds double, double* %238, i64 8
%243 = bitcast double* %242 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.7, <4 x i64>* %243, i32 8, <4 x i1> %223), !tbaa !12, !alias.scope !19, !noalias !16
%244 = getelementptr inbounds double, double* %238, i64 12
%245 = bitcast double* %244 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.7, <4 x i64>* %245, i32 8, <4 x i1> %224), !tbaa !12, !alias.scope !19, !noalias !16
br label %grudge_assign_0.exit
pregion_for_entry..i: ; preds = %vector.scevcheck, %.r_exit.i.3
%_local_id_x.0 = phi i64 [ %265, %.r_exit.i.3 ], [ 0, %vector.scevcheck ]
%conv2.i = trunc i64 %_local_id_x.0 to i32
%add5.i = sub i32 %add4.i, %conv2.i
%cmp.i = icmp sgt i32 %add5.i, -1
br i1 %cmp.i, label %246, label %.r_exit.i
; <label>:246: ; preds = %pregion_for_entry..i
%add10.i = add nuw nsw i32 %mul9.i, %conv2.i
%add13.i = add i32 %add10.i, %4
%idxprom.i = sext i32 %add13.i to i64
%arrayidx.i = getelementptr inbounds double, double* %3, i64 %idxprom.i
%247 = bitcast double* %arrayidx.i to i64*
%248 = load i64, i64* %247, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21
%add20.i = add i32 %add10.i, %2
%idxprom21.i = sext i32 %add20.i to i64
%arrayidx22.i = getelementptr inbounds double, double* %1, i64 %idxprom21.i
%249 = bitcast double* %arrayidx22.i to i64*
store i64 %248, i64* %249, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21
br label %.r_exit.i
.r_exit.i: ; preds = %246, %pregion_for_entry..i
%250 = trunc i64 %_local_id_x.0 to i32
%conv2.i.1 = or i32 %250, 1
%add5.i.1 = sub i32 %add4.i, %conv2.i.1
%cmp.i.1 = icmp sgt i32 %add5.i.1, -1
br i1 %cmp.i.1, label %251, label %.r_exit.i.1
grudge_assign_0.exit: ; preds = %.r_exit.i.3, %vector.ph
ret void
; <label>:251: ; preds = %.r_exit.i
%add10.i.1 = add nuw nsw i32 %mul9.i, %conv2.i.1
%add13.i.1 = add i32 %add10.i.1, %4
%idxprom.i.1 = sext i32 %add13.i.1 to i64
%arrayidx.i.1 = getelementptr inbounds double, double* %3, i64 %idxprom.i.1
%252 = bitcast double* %arrayidx.i.1 to i64*
%253 = load i64, i64* %252, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21
%add20.i.1 = add i32 %add10.i.1, %2
%idxprom21.i.1 = sext i32 %add20.i.1 to i64
%arrayidx22.i.1 = getelementptr inbounds double, double* %1, i64 %idxprom21.i.1
%254 = bitcast double* %arrayidx22.i.1 to i64*
store i64 %253, i64* %254, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21
br label %.r_exit.i.1
.r_exit.i.1: ; preds = %251, %.r_exit.i
%255 = trunc i64 %_local_id_x.0 to i32
%conv2.i.2 = or i32 %255, 2
%add5.i.2 = sub i32 %add4.i, %conv2.i.2
%cmp.i.2 = icmp sgt i32 %add5.i.2, -1
br i1 %cmp.i.2, label %256, label %.r_exit.i.2
; <label>:256: ; preds = %.r_exit.i.1
%add10.i.2 = add nuw nsw i32 %mul9.i, %conv2.i.2
%add13.i.2 = add i32 %add10.i.2, %4
%idxprom.i.2 = sext i32 %add13.i.2 to i64
%arrayidx.i.2 = getelementptr inbounds double, double* %3, i64 %idxprom.i.2
%257 = bitcast double* %arrayidx.i.2 to i64*
%258 = load i64, i64* %257, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21
%add20.i.2 = add i32 %add10.i.2, %2
%idxprom21.i.2 = sext i32 %add20.i.2 to i64
%arrayidx22.i.2 = getelementptr inbounds double, double* %1, i64 %idxprom21.i.2
%259 = bitcast double* %arrayidx22.i.2 to i64*
store i64 %258, i64* %259, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21
br label %.r_exit.i.2
.r_exit.i.2: ; preds = %256, %.r_exit.i.1
%260 = trunc i64 %_local_id_x.0 to i32
%conv2.i.3 = or i32 %260, 3
%add5.i.3 = sub i32 %add4.i, %conv2.i.3
%cmp.i.3 = icmp sgt i32 %add5.i.3, -1
br i1 %cmp.i.3, label %261, label %.r_exit.i.3
; <label>:261: ; preds = %.r_exit.i.2
%add10.i.3 = add nuw nsw i32 %mul9.i, %conv2.i.3
%add13.i.3 = add i32 %add10.i.3, %4
%idxprom.i.3 = sext i32 %add13.i.3 to i64
%arrayidx.i.3 = getelementptr inbounds double, double* %3, i64 %idxprom.i.3
%262 = bitcast double* %arrayidx.i.3 to i64*
%263 = load i64, i64* %262, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21
%add20.i.3 = add i32 %add10.i.3, %2
%idxprom21.i.3 = sext i32 %add20.i.3 to i64
%arrayidx22.i.3 = getelementptr inbounds double, double* %1, i64 %idxprom21.i.3
%264 = bitcast double* %arrayidx22.i.3 to i64*
store i64 %263, i64* %264, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21
br label %.r_exit.i.3
.r_exit.i.3: ; preds = %261, %.r_exit.i.2
%265 = add nuw nsw i64 %_local_id_x.0, 4
%exitcond.3 = icmp eq i64 %265, 128
br i1 %exitcond.3, label %grudge_assign_0.exit, label %pregion_for_entry..i, !llvm.loop !23
}
; Function Attrs: norecurse nounwind
define void @_pocl_kernel_grudge_assign_0_workgroup(i8** nocapture readonly, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone, i64, i64, i64) local_unnamed_addr #1 {
%6 = bitcast i8** %0 to i32**
%7 = load i32*, i32** %6, align 8
%8 = load i32, i32* %7, align 4
%9 = getelementptr i8*, i8** %0, i64 1
%10 = bitcast i8** %9 to double***
%11 = load double**, double*** %10, align 8
%12 = load double*, double** %11, align 8
%13 = getelementptr i8*, i8** %0, i64 2
%14 = bitcast i8** %13 to i32**
%15 = load i32*, i32** %14, align 8
%16 = load i32, i32* %15, align 4
%17 = getelementptr i8*, i8** %0, i64 3
%18 = bitcast i8** %17 to double***
%19 = load double**, double*** %18, align 8
%20 = load double*, double** %19, align 8
%21 = getelementptr i8*, i8** %0, i64 4
%22 = bitcast i8** %21 to i32**
%23 = load i32*, i32** %22, align 8
%24 = load i32, i32* %23, align 4
%conv.i.i = trunc i64 %2 to i32
%mul.i.i = mul nsw i32 %conv.i.i, -128
%add.i.i = add i32 %mul.i.i, -1
%add4.i.i = add i32 %add.i.i, %8
%mul9.i.i = shl nsw i32 %conv.i.i, 7
br label %pregion_for_entry..i.i
pregion_for_entry..i.i: ; preds = %.r_exit.i.i.1, %5
%_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %34, %.r_exit.i.i.1 ]
%conv2.i.i = trunc i64 %_local_id_x.i.0 to i32
%add5.i.i = sub i32 %add4.i.i, %conv2.i.i
%cmp.i.i = icmp sgt i32 %add5.i.i, -1
br i1 %cmp.i.i, label %25, label %.r_exit.i.i
; <label>:25: ; preds = %pregion_for_entry..i.i
%add10.i.i = add nuw nsw i32 %mul9.i.i, %conv2.i.i
%add13.i.i = add i32 %add10.i.i, %24
%idxprom.i.i = sext i32 %add13.i.i to i64
%arrayidx.i.i = getelementptr inbounds double, double* %20, i64 %idxprom.i.i
%26 = bitcast double* %arrayidx.i.i to i64*
%27 = load i64, i64* %26, align 8, !tbaa !12, !alias.scope !25, !noalias !30, !llvm.mem.parallel_loop_access !21
%add20.i.i = add i32 %add10.i.i, %16
%idxprom21.i.i = sext i32 %add20.i.i to i64
%arrayidx22.i.i = getelementptr inbounds double, double* %12, i64 %idxprom21.i.i
%28 = bitcast double* %arrayidx22.i.i to i64*
store i64 %27, i64* %28, align 8, !tbaa !12, !alias.scope !30, !noalias !25, !llvm.mem.parallel_loop_access !21
br label %.r_exit.i.i
.r_exit.i.i: ; preds = %25, %pregion_for_entry..i.i
%29 = trunc i64 %_local_id_x.i.0 to i32
%conv2.i.i.1 = or i32 %29, 1
%add5.i.i.1 = sub i32 %add4.i.i, %conv2.i.i.1
%cmp.i.i.1 = icmp sgt i32 %add5.i.i.1, -1
br i1 %cmp.i.i.1, label %30, label %.r_exit.i.i.1
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i.1
ret void
; <label>:30: ; preds = %.r_exit.i.i
%add10.i.i.1 = add nuw nsw i32 %mul9.i.i, %conv2.i.i.1
%add13.i.i.1 = add i32 %add10.i.i.1, %24
%idxprom.i.i.1 = sext i32 %add13.i.i.1 to i64
%arrayidx.i.i.1 = getelementptr inbounds double, double* %20, i64 %idxprom.i.i.1
%31 = bitcast double* %arrayidx.i.i.1 to i64*
%32 = load i64, i64* %31, align 8, !tbaa !12, !alias.scope !25, !noalias !30, !llvm.mem.parallel_loop_access !21
%add20.i.i.1 = add i32 %add10.i.i.1, %16
%idxprom21.i.i.1 = sext i32 %add20.i.i.1 to i64
%arrayidx22.i.i.1 = getelementptr inbounds double, double* %12, i64 %idxprom21.i.i.1
%33 = bitcast double* %arrayidx22.i.i.1 to i64*
store i64 %32, i64* %33, align 8, !tbaa !12, !alias.scope !30, !noalias !25, !llvm.mem.parallel_loop_access !21
br label %.r_exit.i.i.1
.r_exit.i.i.1: ; preds = %30, %.r_exit.i.i
%34 = add nuw nsw i64 %_local_id_x.i.0, 2
%exitcond.1 = icmp eq i64 %34, 128
br i1 %exitcond.1, label %_pocl_kernel_grudge_assign_0.exit, label %pregion_for_entry..i.i, !llvm.loop !22
}
; Function Attrs: norecurse nounwind
define void @_pocl_kernel_grudge_assign_0_workgroup_fast(i8** nocapture readonly, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone, i64, i64, i64) local_unnamed_addr #1 {
%6 = bitcast i8** %0 to i32**
%7 = load i32*, i32** %6, align 8
%8 = load i32, i32* %7, align 4
%9 = getelementptr i8*, i8** %0, i64 1
%10 = bitcast i8** %9 to double**
%11 = load double*, double** %10, align 8
%12 = getelementptr i8*, i8** %0, i64 2
%13 = bitcast i8** %12 to i32**
%14 = load i32*, i32** %13, align 8
%15 = load i32, i32* %14, align 4
%16 = getelementptr i8*, i8** %0, i64 3
%17 = bitcast i8** %16 to double**
%18 = load double*, double** %17, align 8
%19 = getelementptr i8*, i8** %0, i64 4
%20 = bitcast i8** %19 to i32**
%21 = load i32*, i32** %20, align 8
%22 = load i32, i32* %21, align 4
%conv.i.i = trunc i64 %2 to i32
%mul.i.i = mul nsw i32 %conv.i.i, -128
%add.i.i = add i32 %mul.i.i, -1
%add4.i.i = add i32 %add.i.i, %8
%mul9.i.i = shl nsw i32 %conv.i.i, 7
br label %pregion_for_entry..i.i
pregion_for_entry..i.i: ; preds = %.r_exit.i.i.1, %5
%_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %32, %.r_exit.i.i.1 ]
%conv2.i.i = trunc i64 %_local_id_x.i.0 to i32
%add5.i.i = sub i32 %add4.i.i, %conv2.i.i
%cmp.i.i = icmp sgt i32 %add5.i.i, -1
br i1 %cmp.i.i, label %23, label %.r_exit.i.i
; <label>:23: ; preds = %pregion_for_entry..i.i
%add10.i.i = add nuw nsw i32 %mul9.i.i, %conv2.i.i
%add13.i.i = add i32 %add10.i.i, %22
%idxprom.i.i = sext i32 %add13.i.i to i64
%arrayidx.i.i = getelementptr inbounds double, double* %18, i64 %idxprom.i.i
%24 = bitcast double* %arrayidx.i.i to i64*
%25 = load i64, i64* %24, align 8, !tbaa !12, !alias.scope !33, !noalias !38, !llvm.mem.parallel_loop_access !21
%add20.i.i = add i32 %add10.i.i, %15
%idxprom21.i.i = sext i32 %add20.i.i to i64
%arrayidx22.i.i = getelementptr inbounds double, double* %11, i64 %idxprom21.i.i
%26 = bitcast double* %arrayidx22.i.i to i64*
store i64 %25, i64* %26, align 8, !tbaa !12, !alias.scope !38, !noalias !33, !llvm.mem.parallel_loop_access !21
br label %.r_exit.i.i
.r_exit.i.i: ; preds = %23, %pregion_for_entry..i.i
%27 = trunc i64 %_local_id_x.i.0 to i32
%conv2.i.i.1 = or i32 %27, 1
%add5.i.i.1 = sub i32 %add4.i.i, %conv2.i.i.1
%cmp.i.i.1 = icmp sgt i32 %add5.i.i.1, -1
br i1 %cmp.i.i.1, label %28, label %.r_exit.i.i.1
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i.1
ret void
; <label>:28: ; preds = %.r_exit.i.i
%add10.i.i.1 = add nuw nsw i32 %mul9.i.i, %conv2.i.i.1
%add13.i.i.1 = add i32 %add10.i.i.1, %22
%idxprom.i.i.1 = sext i32 %add13.i.i.1 to i64
%arrayidx.i.i.1 = getelementptr inbounds double, double* %18, i64 %idxprom.i.i.1
%29 = bitcast double* %arrayidx.i.i.1 to i64*
%30 = load i64, i64* %29, align 8, !tbaa !12, !alias.scope !33, !noalias !38, !llvm.mem.parallel_loop_access !21
%add20.i.i.1 = add i32 %add10.i.i.1, %15
%idxprom21.i.i.1 = sext i32 %add20.i.i.1 to i64
%arrayidx22.i.i.1 = getelementptr inbounds double, double* %11, i64 %idxprom21.i.i.1
%31 = bitcast double* %arrayidx22.i.i.1 to i64*
store i64 %30, i64* %31, align 8, !tbaa !12, !alias.scope !38, !noalias !33, !llvm.mem.parallel_loop_access !21
br label %.r_exit.i.i.1
.r_exit.i.i.1: ; preds = %28, %.r_exit.i.i
%32 = add nuw nsw i64 %_local_id_x.i.0, 2
%exitcond.1 = icmp eq i64 %32, 128
br i1 %exitcond.1, label %_pocl_kernel_grudge_assign_0.exit, label %pregion_for_entry..i.i, !llvm.loop !22
}
; Function Attrs: argmemonly nounwind
declare void @llvm.masked.store.v4i64.p0v4i64(<4 x i64>, <4 x i64>*, i32, <4 x i1>) #2
attributes #0 = { alwaysinline norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { norecurse nounwind }
attributes #2 = { argmemonly nounwind }
!llvm.module.flags = !{!0, !1, !2}
!opencl.ocl.version = !{!3}
!llvm.ident = !{!4}
!opencl.spir.version = !{!3}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 1, i32 2}
!4 = !{!"clang version 6.0.1 (tags/RELEASE_601/final)"}
!5 = !{i32 0, i32 1, i32 0, i32 1, i32 0}
!6 = !{!"none", !"none", !"none", !"none", !"none"}
!7 = !{!"int", !"double*", !"int", !"double*", !"int"}
!8 = !{!"", !"restrict", !"", !"restrict const", !""}
!9 = !{!"grdg_n", !"expr_8", !"expr_8_offset", !"grdg_sub_discr_dx0_dr0", !"grdg_sub_discr_dx0_dr0_offset"}
!10 = !{i32 128, i32 1, i32 1}
!11 = !{i32 1}
!12 = !{!13, !13, i64 0}
!13 = !{!"double", !14, i64 0}
!14 = !{!"omnipotent char", !15, i64 0}
!15 = !{!"Simple C/C++ TBAA"}
!16 = !{!17}
!17 = distinct !{!17, !18, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"}
!18 = distinct !{!18, !"grudge_assign_0"}
!19 = !{!20}
!20 = distinct !{!20, !18, !"grudge_assign_0: %expr_8"}
!21 = !{!22}
!22 = distinct !{!22}
!23 = distinct !{!23, !24}
!24 = !{!"llvm.loop.isvectorized", i32 1}
!25 = !{!26, !28}
!26 = distinct !{!26, !27, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"}
!27 = distinct !{!27, !"grudge_assign_0"}
!28 = distinct !{!28, !29, !"_pocl_kernel_grudge_assign_0: argument 1"}
!29 = distinct !{!29, !"_pocl_kernel_grudge_assign_0"}
!30 = !{!31, !32}
!31 = distinct !{!31, !27, !"grudge_assign_0: %expr_8"}
!32 = distinct !{!32, !29, !"_pocl_kernel_grudge_assign_0: argument 0"}
!33 = !{!34, !36}
!34 = distinct !{!34, !35, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"}
!35 = distinct !{!35, !"grudge_assign_0"}
!36 = distinct !{!36, !37, !"_pocl_kernel_grudge_assign_0: argument 1"}
!37 = distinct !{!37, !"_pocl_kernel_grudge_assign_0"}
!38 = !{!39, !40}
!39 = distinct !{!39, !35, !"grudge_assign_0: %expr_8"}
!40 = distinct !{!40, !37, !"_pocl_kernel_grudge_assign_0: argument 0"}
; ModuleID = 'in2.ll'
source_filename = "parallel_bc"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: alwaysinline norecurse nounwind
define void @_pocl_kernel_grudge_assign_0(i32, double* noalias nocapture, i32, double* noalias nocapture readonly, i32, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone, i64, i64, i64) local_unnamed_addr #0 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_name !9 !reqd_work_group_size !10 !pocl_generated !11 {
vector.scevcheck:
%conv.i = trunc i64 %6 to i32
%mul.i = mul nsw i32 %conv.i, -128
%add4.i = add i32 %mul.i, %0
%mul9.i = shl nsw i32 %conv.i, 7
%9 = shl i32 %conv.i, 7
%10 = add i32 %9, %4
%11 = icmp sgt i32 %10, 2147483520
%12 = add i32 %9, %2
%13 = icmp sgt i32 %12, 2147483520
%14 = or i1 %11, %13
br i1 %14, label %pregion_for_entry..i, label %vector.ph
vector.ph: ; preds = %vector.scevcheck
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %add4.i, i32 0
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert16 = insertelement <4 x i32> undef, i32 %add4.i, i32 0
%broadcast.splat17 = shufflevector <4 x i32> %broadcast.splatinsert16, <4 x i32> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert18 = insertelement <4 x i32> undef, i32 %add4.i, i32 0
%broadcast.splat19 = shufflevector <4 x i32> %broadcast.splatinsert18, <4 x i32> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert20 = insertelement <4 x i32> undef, i32 %add4.i, i32 0
%broadcast.splat21 = shufflevector <4 x i32> %broadcast.splatinsert20, <4 x i32> undef, <4 x i32> zeroinitializer
%15 = add <4 x i32> %broadcast.splat, <i32 -1, i32 -2, i32 -3, i32 -4>
%16 = add <4 x i32> %broadcast.splat17, <i32 -5, i32 -6, i32 -7, i32 -8>
%17 = add <4 x i32> %broadcast.splat19, <i32 -9, i32 -10, i32 -11, i32 -12>
%18 = add <4 x i32> %broadcast.splat21, <i32 -13, i32 -14, i32 -15, i32 -16>
%19 = icmp sgt <4 x i32> %15, <i32 -1, i32 -1, i32 -1, i32 -1>
%20 = icmp sgt <4 x i32> %16, <i32 -1, i32 -1, i32 -1, i32 -1>
%21 = icmp sgt <4 x i32> %17, <i32 -1, i32 -1, i32 -1, i32 -1>
%22 = icmp sgt <4 x i32> %18, <i32 -1, i32 -1, i32 -1, i32 -1>
%23 = add i32 %mul9.i, %4
%24 = sext i32 %23 to i64
%25 = getelementptr inbounds double, double* %3, i64 %24
%26 = bitcast double* %25 to <4 x i64>*
%wide.load = load <4 x i64>, <4 x i64>* %26, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%27 = getelementptr inbounds double, double* %25, i64 4
%28 = bitcast double* %27 to <4 x i64>*
%wide.load22 = load <4 x i64>, <4 x i64>* %28, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%29 = getelementptr inbounds double, double* %25, i64 8
%30 = bitcast double* %29 to <4 x i64>*
%wide.load23 = load <4 x i64>, <4 x i64>* %30, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%31 = getelementptr inbounds double, double* %25, i64 12
%32 = bitcast double* %31 to <4 x i64>*
%wide.load24 = load <4 x i64>, <4 x i64>* %32, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%33 = add i32 %mul9.i, %2
%34 = sext i32 %33 to i64
%35 = getelementptr inbounds double, double* %1, i64 %34
%36 = bitcast double* %35 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load, <4 x i64>* %36, i32 8, <4 x i1> %19), !tbaa !12, !alias.scope !19, !noalias !16
%37 = getelementptr inbounds double, double* %35, i64 4
%38 = bitcast double* %37 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22, <4 x i64>* %38, i32 8, <4 x i1> %20), !tbaa !12, !alias.scope !19, !noalias !16
%39 = getelementptr inbounds double, double* %35, i64 8
%40 = bitcast double* %39 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23, <4 x i64>* %40, i32 8, <4 x i1> %21), !tbaa !12, !alias.scope !19, !noalias !16
%41 = getelementptr inbounds double, double* %35, i64 12
%42 = bitcast double* %41 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24, <4 x i64>* %42, i32 8, <4 x i1> %22), !tbaa !12, !alias.scope !19, !noalias !16
%43 = add <4 x i32> %broadcast.splat, <i32 -17, i32 -18, i32 -19, i32 -20>
%44 = add <4 x i32> %broadcast.splat17, <i32 -21, i32 -22, i32 -23, i32 -24>
%45 = add <4 x i32> %broadcast.splat19, <i32 -25, i32 -26, i32 -27, i32 -28>
%46 = add <4 x i32> %broadcast.splat21, <i32 -29, i32 -30, i32 -31, i32 -32>
%47 = icmp sgt <4 x i32> %43, <i32 -1, i32 -1, i32 -1, i32 -1>
%48 = icmp sgt <4 x i32> %44, <i32 -1, i32 -1, i32 -1, i32 -1>
%49 = icmp sgt <4 x i32> %45, <i32 -1, i32 -1, i32 -1, i32 -1>
%50 = icmp sgt <4 x i32> %46, <i32 -1, i32 -1, i32 -1, i32 -1>
%51 = or i32 %mul9.i, 16
%52 = add i32 %51, %4
%53 = sext i32 %52 to i64
%54 = getelementptr inbounds double, double* %3, i64 %53
%55 = bitcast double* %54 to <4 x i64>*
%wide.load.1 = load <4 x i64>, <4 x i64>* %55, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%56 = getelementptr inbounds double, double* %54, i64 4
%57 = bitcast double* %56 to <4 x i64>*
%wide.load22.1 = load <4 x i64>, <4 x i64>* %57, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%58 = getelementptr inbounds double, double* %54, i64 8
%59 = bitcast double* %58 to <4 x i64>*
%wide.load23.1 = load <4 x i64>, <4 x i64>* %59, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%60 = getelementptr inbounds double, double* %54, i64 12
%61 = bitcast double* %60 to <4 x i64>*
%wide.load24.1 = load <4 x i64>, <4 x i64>* %61, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%62 = add i32 %51, %2
%63 = sext i32 %62 to i64
%64 = getelementptr inbounds double, double* %1, i64 %63
%65 = bitcast double* %64 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.1, <4 x i64>* %65, i32 8, <4 x i1> %47), !tbaa !12, !alias.scope !19, !noalias !16
%66 = getelementptr inbounds double, double* %64, i64 4
%67 = bitcast double* %66 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.1, <4 x i64>* %67, i32 8, <4 x i1> %48), !tbaa !12, !alias.scope !19, !noalias !16
%68 = getelementptr inbounds double, double* %64, i64 8
%69 = bitcast double* %68 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.1, <4 x i64>* %69, i32 8, <4 x i1> %49), !tbaa !12, !alias.scope !19, !noalias !16
%70 = getelementptr inbounds double, double* %64, i64 12
%71 = bitcast double* %70 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.1, <4 x i64>* %71, i32 8, <4 x i1> %50), !tbaa !12, !alias.scope !19, !noalias !16
%72 = add <4 x i32> %broadcast.splat, <i32 -33, i32 -34, i32 -35, i32 -36>
%73 = add <4 x i32> %broadcast.splat17, <i32 -37, i32 -38, i32 -39, i32 -40>
%74 = add <4 x i32> %broadcast.splat19, <i32 -41, i32 -42, i32 -43, i32 -44>
%75 = add <4 x i32> %broadcast.splat21, <i32 -45, i32 -46, i32 -47, i32 -48>
%76 = icmp sgt <4 x i32> %72, <i32 -1, i32 -1, i32 -1, i32 -1>
%77 = icmp sgt <4 x i32> %73, <i32 -1, i32 -1, i32 -1, i32 -1>
%78 = icmp sgt <4 x i32> %74, <i32 -1, i32 -1, i32 -1, i32 -1>
%79 = icmp sgt <4 x i32> %75, <i32 -1, i32 -1, i32 -1, i32 -1>
%80 = or i32 %mul9.i, 32
%81 = add i32 %80, %4
%82 = sext i32 %81 to i64
%83 = getelementptr inbounds double, double* %3, i64 %82
%84 = bitcast double* %83 to <4 x i64>*
%wide.load.2 = load <4 x i64>, <4 x i64>* %84, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%85 = getelementptr inbounds double, double* %83, i64 4
%86 = bitcast double* %85 to <4 x i64>*
%wide.load22.2 = load <4 x i64>, <4 x i64>* %86, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%87 = getelementptr inbounds double, double* %83, i64 8
%88 = bitcast double* %87 to <4 x i64>*
%wide.load23.2 = load <4 x i64>, <4 x i64>* %88, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%89 = getelementptr inbounds double, double* %83, i64 12
%90 = bitcast double* %89 to <4 x i64>*
%wide.load24.2 = load <4 x i64>, <4 x i64>* %90, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%91 = add i32 %80, %2
%92 = sext i32 %91 to i64
%93 = getelementptr inbounds double, double* %1, i64 %92
%94 = bitcast double* %93 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.2, <4 x i64>* %94, i32 8, <4 x i1> %76), !tbaa !12, !alias.scope !19, !noalias !16
%95 = getelementptr inbounds double, double* %93, i64 4
%96 = bitcast double* %95 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.2, <4 x i64>* %96, i32 8, <4 x i1> %77), !tbaa !12, !alias.scope !19, !noalias !16
%97 = getelementptr inbounds double, double* %93, i64 8
%98 = bitcast double* %97 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.2, <4 x i64>* %98, i32 8, <4 x i1> %78), !tbaa !12, !alias.scope !19, !noalias !16
%99 = getelementptr inbounds double, double* %93, i64 12
%100 = bitcast double* %99 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.2, <4 x i64>* %100, i32 8, <4 x i1> %79), !tbaa !12, !alias.scope !19, !noalias !16
%101 = add <4 x i32> %broadcast.splat, <i32 -49, i32 -50, i32 -51, i32 -52>
%102 = add <4 x i32> %broadcast.splat17, <i32 -53, i32 -54, i32 -55, i32 -56>
%103 = add <4 x i32> %broadcast.splat19, <i32 -57, i32 -58, i32 -59, i32 -60>
%104 = add <4 x i32> %broadcast.splat21, <i32 -61, i32 -62, i32 -63, i32 -64>
%105 = icmp sgt <4 x i32> %101, <i32 -1, i32 -1, i32 -1, i32 -1>
%106 = icmp sgt <4 x i32> %102, <i32 -1, i32 -1, i32 -1, i32 -1>
%107 = icmp sgt <4 x i32> %103, <i32 -1, i32 -1, i32 -1, i32 -1>
%108 = icmp sgt <4 x i32> %104, <i32 -1, i32 -1, i32 -1, i32 -1>
%109 = or i32 %mul9.i, 48
%110 = add i32 %109, %4
%111 = sext i32 %110 to i64
%112 = getelementptr inbounds double, double* %3, i64 %111
%113 = bitcast double* %112 to <4 x i64>*
%wide.load.3 = load <4 x i64>, <4 x i64>* %113, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%114 = getelementptr inbounds double, double* %112, i64 4
%115 = bitcast double* %114 to <4 x i64>*
%wide.load22.3 = load <4 x i64>, <4 x i64>* %115, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%116 = getelementptr inbounds double, double* %112, i64 8
%117 = bitcast double* %116 to <4 x i64>*
%wide.load23.3 = load <4 x i64>, <4 x i64>* %117, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%118 = getelementptr inbounds double, double* %112, i64 12
%119 = bitcast double* %118 to <4 x i64>*
%wide.load24.3 = load <4 x i64>, <4 x i64>* %119, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%120 = add i32 %109, %2
%121 = sext i32 %120 to i64
%122 = getelementptr inbounds double, double* %1, i64 %121
%123 = bitcast double* %122 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.3, <4 x i64>* %123, i32 8, <4 x i1> %105), !tbaa !12, !alias.scope !19, !noalias !16
%124 = getelementptr inbounds double, double* %122, i64 4
%125 = bitcast double* %124 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.3, <4 x i64>* %125, i32 8, <4 x i1> %106), !tbaa !12, !alias.scope !19, !noalias !16
%126 = getelementptr inbounds double, double* %122, i64 8
%127 = bitcast double* %126 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.3, <4 x i64>* %127, i32 8, <4 x i1> %107), !tbaa !12, !alias.scope !19, !noalias !16
%128 = getelementptr inbounds double, double* %122, i64 12
%129 = bitcast double* %128 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.3, <4 x i64>* %129, i32 8, <4 x i1> %108), !tbaa !12, !alias.scope !19, !noalias !16
%130 = add <4 x i32> %broadcast.splat, <i32 -65, i32 -66, i32 -67, i32 -68>
%131 = add <4 x i32> %broadcast.splat17, <i32 -69, i32 -70, i32 -71, i32 -72>
%132 = add <4 x i32> %broadcast.splat19, <i32 -73, i32 -74, i32 -75, i32 -76>
%133 = add <4 x i32> %broadcast.splat21, <i32 -77, i32 -78, i32 -79, i32 -80>
%134 = icmp sgt <4 x i32> %130, <i32 -1, i32 -1, i32 -1, i32 -1>
%135 = icmp sgt <4 x i32> %131, <i32 -1, i32 -1, i32 -1, i32 -1>
%136 = icmp sgt <4 x i32> %132, <i32 -1, i32 -1, i32 -1, i32 -1>
%137 = icmp sgt <4 x i32> %133, <i32 -1, i32 -1, i32 -1, i32 -1>
%138 = or i32 %mul9.i, 64
%139 = add i32 %138, %4
%140 = sext i32 %139 to i64
%141 = getelementptr inbounds double, double* %3, i64 %140
%142 = bitcast double* %141 to <4 x i64>*
%wide.load.4 = load <4 x i64>, <4 x i64>* %142, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%143 = getelementptr inbounds double, double* %141, i64 4
%144 = bitcast double* %143 to <4 x i64>*
%wide.load22.4 = load <4 x i64>, <4 x i64>* %144, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%145 = getelementptr inbounds double, double* %141, i64 8
%146 = bitcast double* %145 to <4 x i64>*
%wide.load23.4 = load <4 x i64>, <4 x i64>* %146, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%147 = getelementptr inbounds double, double* %141, i64 12
%148 = bitcast double* %147 to <4 x i64>*
%wide.load24.4 = load <4 x i64>, <4 x i64>* %148, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%149 = add i32 %138, %2
%150 = sext i32 %149 to i64
%151 = getelementptr inbounds double, double* %1, i64 %150
%152 = bitcast double* %151 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.4, <4 x i64>* %152, i32 8, <4 x i1> %134), !tbaa !12, !alias.scope !19, !noalias !16
%153 = getelementptr inbounds double, double* %151, i64 4
%154 = bitcast double* %153 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.4, <4 x i64>* %154, i32 8, <4 x i1> %135), !tbaa !12, !alias.scope !19, !noalias !16
%155 = getelementptr inbounds double, double* %151, i64 8
%156 = bitcast double* %155 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.4, <4 x i64>* %156, i32 8, <4 x i1> %136), !tbaa !12, !alias.scope !19, !noalias !16
%157 = getelementptr inbounds double, double* %151, i64 12
%158 = bitcast double* %157 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.4, <4 x i64>* %158, i32 8, <4 x i1> %137), !tbaa !12, !alias.scope !19, !noalias !16
%159 = add <4 x i32> %broadcast.splat, <i32 -81, i32 -82, i32 -83, i32 -84>
%160 = add <4 x i32> %broadcast.splat17, <i32 -85, i32 -86, i32 -87, i32 -88>
%161 = add <4 x i32> %broadcast.splat19, <i32 -89, i32 -90, i32 -91, i32 -92>
%162 = add <4 x i32> %broadcast.splat21, <i32 -93, i32 -94, i32 -95, i32 -96>
%163 = icmp sgt <4 x i32> %159, <i32 -1, i32 -1, i32 -1, i32 -1>
%164 = icmp sgt <4 x i32> %160, <i32 -1, i32 -1, i32 -1, i32 -1>
%165 = icmp sgt <4 x i32> %161, <i32 -1, i32 -1, i32 -1, i32 -1>
%166 = icmp sgt <4 x i32> %162, <i32 -1, i32 -1, i32 -1, i32 -1>
%167 = or i32 %mul9.i, 80
%168 = add i32 %167, %4
%169 = sext i32 %168 to i64
%170 = getelementptr inbounds double, double* %3, i64 %169
%171 = bitcast double* %170 to <4 x i64>*
%wide.load.5 = load <4 x i64>, <4 x i64>* %171, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%172 = getelementptr inbounds double, double* %170, i64 4
%173 = bitcast double* %172 to <4 x i64>*
%wide.load22.5 = load <4 x i64>, <4 x i64>* %173, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%174 = getelementptr inbounds double, double* %170, i64 8
%175 = bitcast double* %174 to <4 x i64>*
%wide.load23.5 = load <4 x i64>, <4 x i64>* %175, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%176 = getelementptr inbounds double, double* %170, i64 12
%177 = bitcast double* %176 to <4 x i64>*
%wide.load24.5 = load <4 x i64>, <4 x i64>* %177, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%178 = add i32 %167, %2
%179 = sext i32 %178 to i64
%180 = getelementptr inbounds double, double* %1, i64 %179
%181 = bitcast double* %180 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.5, <4 x i64>* %181, i32 8, <4 x i1> %163), !tbaa !12, !alias.scope !19, !noalias !16
%182 = getelementptr inbounds double, double* %180, i64 4
%183 = bitcast double* %182 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.5, <4 x i64>* %183, i32 8, <4 x i1> %164), !tbaa !12, !alias.scope !19, !noalias !16
%184 = getelementptr inbounds double, double* %180, i64 8
%185 = bitcast double* %184 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.5, <4 x i64>* %185, i32 8, <4 x i1> %165), !tbaa !12, !alias.scope !19, !noalias !16
%186 = getelementptr inbounds double, double* %180, i64 12
%187 = bitcast double* %186 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.5, <4 x i64>* %187, i32 8, <4 x i1> %166), !tbaa !12, !alias.scope !19, !noalias !16
%188 = add <4 x i32> %broadcast.splat, <i32 -97, i32 -98, i32 -99, i32 -100>
%189 = add <4 x i32> %broadcast.splat17, <i32 -101, i32 -102, i32 -103, i32 -104>
%190 = add <4 x i32> %broadcast.splat19, <i32 -105, i32 -106, i32 -107, i32 -108>
%191 = add <4 x i32> %broadcast.splat21, <i32 -109, i32 -110, i32 -111, i32 -112>
%192 = icmp sgt <4 x i32> %188, <i32 -1, i32 -1, i32 -1, i32 -1>
%193 = icmp sgt <4 x i32> %189, <i32 -1, i32 -1, i32 -1, i32 -1>
%194 = icmp sgt <4 x i32> %190, <i32 -1, i32 -1, i32 -1, i32 -1>
%195 = icmp sgt <4 x i32> %191, <i32 -1, i32 -1, i32 -1, i32 -1>
%196 = or i32 %mul9.i, 96
%197 = add i32 %196, %4
%198 = sext i32 %197 to i64
%199 = getelementptr inbounds double, double* %3, i64 %198
%200 = bitcast double* %199 to <4 x i64>*
%wide.load.6 = load <4 x i64>, <4 x i64>* %200, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%201 = getelementptr inbounds double, double* %199, i64 4
%202 = bitcast double* %201 to <4 x i64>*
%wide.load22.6 = load <4 x i64>, <4 x i64>* %202, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%203 = getelementptr inbounds double, double* %199, i64 8
%204 = bitcast double* %203 to <4 x i64>*
%wide.load23.6 = load <4 x i64>, <4 x i64>* %204, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%205 = getelementptr inbounds double, double* %199, i64 12
%206 = bitcast double* %205 to <4 x i64>*
%wide.load24.6 = load <4 x i64>, <4 x i64>* %206, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%207 = add i32 %196, %2
%208 = sext i32 %207 to i64
%209 = getelementptr inbounds double, double* %1, i64 %208
%210 = bitcast double* %209 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.6, <4 x i64>* %210, i32 8, <4 x i1> %192), !tbaa !12, !alias.scope !19, !noalias !16
%211 = getelementptr inbounds double, double* %209, i64 4
%212 = bitcast double* %211 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.6, <4 x i64>* %212, i32 8, <4 x i1> %193), !tbaa !12, !alias.scope !19, !noalias !16
%213 = getelementptr inbounds double, double* %209, i64 8
%214 = bitcast double* %213 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.6, <4 x i64>* %214, i32 8, <4 x i1> %194), !tbaa !12, !alias.scope !19, !noalias !16
%215 = getelementptr inbounds double, double* %209, i64 12
%216 = bitcast double* %215 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.6, <4 x i64>* %216, i32 8, <4 x i1> %195), !tbaa !12, !alias.scope !19, !noalias !16
%217 = add <4 x i32> %broadcast.splat, <i32 -113, i32 -114, i32 -115, i32 -116>
%218 = add <4 x i32> %broadcast.splat17, <i32 -117, i32 -118, i32 -119, i32 -120>
%219 = add <4 x i32> %broadcast.splat19, <i32 -121, i32 -122, i32 -123, i32 -124>
%220 = add <4 x i32> %broadcast.splat21, <i32 -125, i32 -126, i32 -127, i32 -128>
%221 = icmp sgt <4 x i32> %217, <i32 -1, i32 -1, i32 -1, i32 -1>
%222 = icmp sgt <4 x i32> %218, <i32 -1, i32 -1, i32 -1, i32 -1>
%223 = icmp sgt <4 x i32> %219, <i32 -1, i32 -1, i32 -1, i32 -1>
%224 = icmp sgt <4 x i32> %220, <i32 -1, i32 -1, i32 -1, i32 -1>
%225 = or i32 %mul9.i, 112
%226 = add i32 %225, %4
%227 = sext i32 %226 to i64
%228 = getelementptr inbounds double, double* %3, i64 %227
%229 = bitcast double* %228 to <4 x i64>*
%wide.load.7 = load <4 x i64>, <4 x i64>* %229, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%230 = getelementptr inbounds double, double* %228, i64 4
%231 = bitcast double* %230 to <4 x i64>*
%wide.load22.7 = load <4 x i64>, <4 x i64>* %231, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%232 = getelementptr inbounds double, double* %228, i64 8
%233 = bitcast double* %232 to <4 x i64>*
%wide.load23.7 = load <4 x i64>, <4 x i64>* %233, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%234 = getelementptr inbounds double, double* %228, i64 12
%235 = bitcast double* %234 to <4 x i64>*
%wide.load24.7 = load <4 x i64>, <4 x i64>* %235, align 8, !tbaa !12, !alias.scope !16, !noalias !19
%236 = add i32 %225, %2
%237 = sext i32 %236 to i64
%238 = getelementptr inbounds double, double* %1, i64 %237
%239 = bitcast double* %238 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load.7, <4 x i64>* %239, i32 8, <4 x i1> %221), !tbaa !12, !alias.scope !19, !noalias !16
%240 = getelementptr inbounds double, double* %238, i64 4
%241 = bitcast double* %240 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load22.7, <4 x i64>* %241, i32 8, <4 x i1> %222), !tbaa !12, !alias.scope !19, !noalias !16
%242 = getelementptr inbounds double, double* %238, i64 8
%243 = bitcast double* %242 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load23.7, <4 x i64>* %243, i32 8, <4 x i1> %223), !tbaa !12, !alias.scope !19, !noalias !16
%244 = getelementptr inbounds double, double* %238, i64 12
%245 = bitcast double* %244 to <4 x i64>*
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %wide.load24.7, <4 x i64>* %245, i32 8, <4 x i1> %224), !tbaa !12, !alias.scope !19, !noalias !16
br label %grudge_assign_0.exit
pregion_for_entry..i: ; preds = %vector.scevcheck, %.r_exit.i.1
%_local_id_x.0 = phi i64 [ %255, %.r_exit.i.1 ], [ 0, %vector.scevcheck ]
%conv2.i = trunc i64 %_local_id_x.0 to i32
%add.i = xor i32 %conv2.i, -1
%add5.i = add i32 %add4.i, %add.i
%cmp.i = icmp sgt i32 %add5.i, -1
br i1 %cmp.i, label %246, label %.r_exit.i
; <label>:246: ; preds = %pregion_for_entry..i
%add10.i = add nuw nsw i32 %mul9.i, %conv2.i
%add13.i = add i32 %add10.i, %4
%idxprom.i = sext i32 %add13.i to i64
%arrayidx.i = getelementptr inbounds double, double* %3, i64 %idxprom.i
%247 = bitcast double* %arrayidx.i to i64*
%248 = load i64, i64* %247, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21
%add20.i = add i32 %add10.i, %2
%idxprom21.i = sext i32 %add20.i to i64
%arrayidx22.i = getelementptr inbounds double, double* %1, i64 %idxprom21.i
%249 = bitcast double* %arrayidx22.i to i64*
store i64 %248, i64* %249, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21
br label %.r_exit.i
.r_exit.i: ; preds = %246, %pregion_for_entry..i
%250 = trunc i64 %_local_id_x.0 to i32
%add.i.1 = xor i32 %250, -2
%add5.i.1 = add i32 %add4.i, %add.i.1
%cmp.i.1 = icmp sgt i32 %add5.i.1, -1
br i1 %cmp.i.1, label %251, label %.r_exit.i.1
grudge_assign_0.exit: ; preds = %.r_exit.i.1, %vector.ph
ret void
; <label>:251: ; preds = %.r_exit.i
%conv2.i.1 = or i32 %250, 1
%add10.i.1 = add nuw nsw i32 %mul9.i, %conv2.i.1
%add13.i.1 = add i32 %add10.i.1, %4
%idxprom.i.1 = sext i32 %add13.i.1 to i64
%arrayidx.i.1 = getelementptr inbounds double, double* %3, i64 %idxprom.i.1
%252 = bitcast double* %arrayidx.i.1 to i64*
%253 = load i64, i64* %252, align 8, !tbaa !12, !alias.scope !16, !noalias !19, !llvm.mem.parallel_loop_access !21
%add20.i.1 = add i32 %add10.i.1, %2
%idxprom21.i.1 = sext i32 %add20.i.1 to i64
%arrayidx22.i.1 = getelementptr inbounds double, double* %1, i64 %idxprom21.i.1
%254 = bitcast double* %arrayidx22.i.1 to i64*
store i64 %253, i64* %254, align 8, !tbaa !12, !alias.scope !19, !noalias !16, !llvm.mem.parallel_loop_access !21
br label %.r_exit.i.1
.r_exit.i.1: ; preds = %251, %.r_exit.i
%255 = add nuw nsw i64 %_local_id_x.0, 2
%exitcond.1 = icmp eq i64 %255, 128
br i1 %exitcond.1, label %grudge_assign_0.exit, label %pregion_for_entry..i, !llvm.loop !23
}
; Function Attrs: norecurse nounwind
define void @_pocl_kernel_grudge_assign_0_workgroup(i8** nocapture readonly, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone, i64, i64, i64) local_unnamed_addr #1 {
%6 = bitcast i8** %0 to i32**
%7 = load i32*, i32** %6, align 8
%8 = load i32, i32* %7, align 4
%9 = getelementptr i8*, i8** %0, i64 1
%10 = bitcast i8** %9 to double***
%11 = load double**, double*** %10, align 8
%12 = load double*, double** %11, align 8
%13 = getelementptr i8*, i8** %0, i64 2
%14 = bitcast i8** %13 to i32**
%15 = load i32*, i32** %14, align 8
%16 = load i32, i32* %15, align 4
%17 = getelementptr i8*, i8** %0, i64 3
%18 = bitcast i8** %17 to double***
%19 = load double**, double*** %18, align 8
%20 = load double*, double** %19, align 8
%21 = getelementptr i8*, i8** %0, i64 4
%22 = bitcast i8** %21 to i32**
%23 = load i32*, i32** %22, align 8
%24 = load i32, i32* %23, align 4
%conv.i.i = trunc i64 %2 to i32
%mul.i.i = mul nsw i32 %conv.i.i, -128
%add4.i.i = add i32 %8, %mul.i.i
%mul9.i.i = shl nsw i32 %conv.i.i, 7
br label %pregion_for_entry..i.i
pregion_for_entry..i.i: ; preds = %.r_exit.i.i.1, %5
%_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %34, %.r_exit.i.i.1 ]
%conv2.i.i = trunc i64 %_local_id_x.i.0 to i32
%add.i.i = xor i32 %conv2.i.i, -1
%add5.i.i = add i32 %add4.i.i, %add.i.i
%cmp.i.i = icmp sgt i32 %add5.i.i, -1
br i1 %cmp.i.i, label %25, label %.r_exit.i.i
; <label>:25: ; preds = %pregion_for_entry..i.i
%add10.i.i = add nuw nsw i32 %mul9.i.i, %conv2.i.i
%add13.i.i = add i32 %add10.i.i, %24
%idxprom.i.i = sext i32 %add13.i.i to i64
%arrayidx.i.i = getelementptr inbounds double, double* %20, i64 %idxprom.i.i
%26 = bitcast double* %arrayidx.i.i to i64*
%27 = load i64, i64* %26, align 8, !tbaa !12, !alias.scope !25, !noalias !30, !llvm.mem.parallel_loop_access !21
%add20.i.i = add i32 %add10.i.i, %16
%idxprom21.i.i = sext i32 %add20.i.i to i64
%arrayidx22.i.i = getelementptr inbounds double, double* %12, i64 %idxprom21.i.i
%28 = bitcast double* %arrayidx22.i.i to i64*
store i64 %27, i64* %28, align 8, !tbaa !12, !alias.scope !30, !noalias !25, !llvm.mem.parallel_loop_access !21
br label %.r_exit.i.i
.r_exit.i.i: ; preds = %25, %pregion_for_entry..i.i
%29 = trunc i64 %_local_id_x.i.0 to i32
%add.i.i.1 = xor i32 %29, -2
%add5.i.i.1 = add i32 %add4.i.i, %add.i.i.1
%cmp.i.i.1 = icmp sgt i32 %add5.i.i.1, -1
br i1 %cmp.i.i.1, label %30, label %.r_exit.i.i.1
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i.1
ret void
; <label>:30: ; preds = %.r_exit.i.i
%conv2.i.i.1 = or i32 %29, 1
%add10.i.i.1 = add nuw nsw i32 %mul9.i.i, %conv2.i.i.1
%add13.i.i.1 = add i32 %add10.i.i.1, %24
%idxprom.i.i.1 = sext i32 %add13.i.i.1 to i64
%arrayidx.i.i.1 = getelementptr inbounds double, double* %20, i64 %idxprom.i.i.1
%31 = bitcast double* %arrayidx.i.i.1 to i64*
%32 = load i64, i64* %31, align 8, !tbaa !12, !alias.scope !25, !noalias !30, !llvm.mem.parallel_loop_access !21
%add20.i.i.1 = add i32 %add10.i.i.1, %16
%idxprom21.i.i.1 = sext i32 %add20.i.i.1 to i64
%arrayidx22.i.i.1 = getelementptr inbounds double, double* %12, i64 %idxprom21.i.i.1
%33 = bitcast double* %arrayidx22.i.i.1 to i64*
store i64 %32, i64* %33, align 8, !tbaa !12, !alias.scope !30, !noalias !25, !llvm.mem.parallel_loop_access !21
br label %.r_exit.i.i.1
.r_exit.i.i.1: ; preds = %30, %.r_exit.i.i
%34 = add nuw nsw i64 %_local_id_x.i.0, 2
%exitcond.1 = icmp eq i64 %34, 128
br i1 %exitcond.1, label %_pocl_kernel_grudge_assign_0.exit, label %pregion_for_entry..i.i, !llvm.loop !22
}
; Function Attrs: norecurse nounwind
define void @_pocl_kernel_grudge_assign_0_workgroup_fast(i8** nocapture readonly, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readnone, i64, i64, i64) local_unnamed_addr #1 {
%6 = bitcast i8** %0 to i32**
%7 = load i32*, i32** %6, align 8
%8 = load i32, i32* %7, align 4
%9 = getelementptr i8*, i8** %0, i64 1
%10 = bitcast i8** %9 to double**
%11 = load double*, double** %10, align 8
%12 = getelementptr i8*, i8** %0, i64 2
%13 = bitcast i8** %12 to i32**
%14 = load i32*, i32** %13, align 8
%15 = load i32, i32* %14, align 4
%16 = getelementptr i8*, i8** %0, i64 3
%17 = bitcast i8** %16 to double**
%18 = load double*, double** %17, align 8
%19 = getelementptr i8*, i8** %0, i64 4
%20 = bitcast i8** %19 to i32**
%21 = load i32*, i32** %20, align 8
%22 = load i32, i32* %21, align 4
%conv.i.i = trunc i64 %2 to i32
%mul.i.i = mul nsw i32 %conv.i.i, -128
%add4.i.i = add i32 %8, %mul.i.i
%mul9.i.i = shl nsw i32 %conv.i.i, 7
br label %pregion_for_entry..i.i
pregion_for_entry..i.i: ; preds = %.r_exit.i.i.1, %5
%_local_id_x.i.0 = phi i64 [ 0, %5 ], [ %32, %.r_exit.i.i.1 ]
%conv2.i.i = trunc i64 %_local_id_x.i.0 to i32
%add.i.i = xor i32 %conv2.i.i, -1
%add5.i.i = add i32 %add4.i.i, %add.i.i
%cmp.i.i = icmp sgt i32 %add5.i.i, -1
br i1 %cmp.i.i, label %23, label %.r_exit.i.i
; <label>:23: ; preds = %pregion_for_entry..i.i
%add10.i.i = add nuw nsw i32 %mul9.i.i, %conv2.i.i
%add13.i.i = add i32 %add10.i.i, %22
%idxprom.i.i = sext i32 %add13.i.i to i64
%arrayidx.i.i = getelementptr inbounds double, double* %18, i64 %idxprom.i.i
%24 = bitcast double* %arrayidx.i.i to i64*
%25 = load i64, i64* %24, align 8, !tbaa !12, !alias.scope !33, !noalias !38, !llvm.mem.parallel_loop_access !21
%add20.i.i = add i32 %add10.i.i, %15
%idxprom21.i.i = sext i32 %add20.i.i to i64
%arrayidx22.i.i = getelementptr inbounds double, double* %11, i64 %idxprom21.i.i
%26 = bitcast double* %arrayidx22.i.i to i64*
store i64 %25, i64* %26, align 8, !tbaa !12, !alias.scope !38, !noalias !33, !llvm.mem.parallel_loop_access !21
br label %.r_exit.i.i
.r_exit.i.i: ; preds = %23, %pregion_for_entry..i.i
%27 = trunc i64 %_local_id_x.i.0 to i32
%add.i.i.1 = xor i32 %27, -2
%add5.i.i.1 = add i32 %add4.i.i, %add.i.i.1
%cmp.i.i.1 = icmp sgt i32 %add5.i.i.1, -1
br i1 %cmp.i.i.1, label %28, label %.r_exit.i.i.1
_pocl_kernel_grudge_assign_0.exit: ; preds = %.r_exit.i.i.1
ret void
; <label>:28: ; preds = %.r_exit.i.i
%conv2.i.i.1 = or i32 %27, 1
%add10.i.i.1 = add nuw nsw i32 %mul9.i.i, %conv2.i.i.1
%add13.i.i.1 = add i32 %add10.i.i.1, %22
%idxprom.i.i.1 = sext i32 %add13.i.i.1 to i64
%arrayidx.i.i.1 = getelementptr inbounds double, double* %18, i64 %idxprom.i.i.1
%29 = bitcast double* %arrayidx.i.i.1 to i64*
%30 = load i64, i64* %29, align 8, !tbaa !12, !alias.scope !33, !noalias !38, !llvm.mem.parallel_loop_access !21
%add20.i.i.1 = add i32 %add10.i.i.1, %15
%idxprom21.i.i.1 = sext i32 %add20.i.i.1 to i64
%arrayidx22.i.i.1 = getelementptr inbounds double, double* %11, i64 %idxprom21.i.i.1
%31 = bitcast double* %arrayidx22.i.i.1 to i64*
store i64 %30, i64* %31, align 8, !tbaa !12, !alias.scope !38, !noalias !33, !llvm.mem.parallel_loop_access !21
br label %.r_exit.i.i.1
.r_exit.i.i.1: ; preds = %28, %.r_exit.i.i
%32 = add nuw nsw i64 %_local_id_x.i.0, 2
%exitcond.1 = icmp eq i64 %32, 128
br i1 %exitcond.1, label %_pocl_kernel_grudge_assign_0.exit, label %pregion_for_entry..i.i, !llvm.loop !22
}
; Function Attrs: argmemonly nounwind
declare void @llvm.masked.store.v4i64.p0v4i64(<4 x i64>, <4 x i64>*, i32, <4 x i1>) #2
attributes #0 = { alwaysinline norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "stackrealign" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { norecurse nounwind }
attributes #2 = { argmemonly nounwind }
!llvm.module.flags = !{!0, !1, !2}
!opencl.ocl.version = !{!3}
!llvm.ident = !{!4}
!opencl.spir.version = !{!3}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 1, i32 2}
!4 = !{!"clang version 6.0.1 (tags/RELEASE_601/final)"}
!5 = !{i32 0, i32 1, i32 0, i32 1, i32 0}
!6 = !{!"none", !"none", !"none", !"none", !"none"}
!7 = !{!"int", !"double*", !"int", !"double*", !"int"}
!8 = !{!"", !"restrict", !"", !"restrict const", !""}
!9 = !{!"grdg_n", !"expr_8", !"expr_8_offset", !"grdg_sub_discr_dx0_dr0", !"grdg_sub_discr_dx0_dr0_offset"}
!10 = !{i32 128, i32 1, i32 1}
!11 = !{i32 1}
!12 = !{!13, !13, i64 0}
!13 = !{!"double", !14, i64 0}
!14 = !{!"omnipotent char", !15, i64 0}
!15 = !{!"Simple C/C++ TBAA"}
!16 = !{!17}
!17 = distinct !{!17, !18, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"}
!18 = distinct !{!18, !"grudge_assign_0"}
!19 = !{!20}
!20 = distinct !{!20, !18, !"grudge_assign_0: %expr_8"}
!21 = !{!22}
!22 = distinct !{!22}
!23 = distinct !{!23, !24}
!24 = !{!"llvm.loop.isvectorized", i32 1}
!25 = !{!26, !28}
!26 = distinct !{!26, !27, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"}
!27 = distinct !{!27, !"grudge_assign_0"}
!28 = distinct !{!28, !29, !"_pocl_kernel_grudge_assign_0: argument 1"}
!29 = distinct !{!29, !"_pocl_kernel_grudge_assign_0"}
!30 = !{!31, !32}
!31 = distinct !{!31, !27, !"grudge_assign_0: %expr_8"}
!32 = distinct !{!32, !29, !"_pocl_kernel_grudge_assign_0: argument 0"}
!33 = !{!34, !36}
!34 = distinct !{!34, !35, !"grudge_assign_0: %grdg_sub_discr_dx0_dr0"}
!35 = distinct !{!35, !"grudge_assign_0"}
!36 = distinct !{!36, !37, !"_pocl_kernel_grudge_assign_0: argument 1"}
!37 = distinct !{!37, !"_pocl_kernel_grudge_assign_0"}
!38 = !{!39, !40}
!39 = distinct !{!39, !35, !"grudge_assign_0: %expr_8"}
!40 = distinct !{!40, !37, !"_pocl_kernel_grudge_assign_0: argument 0"}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment