masahi/myadd_kernel0.ll

## myadd_kernel0.ll
[23:36:46] src/codegen/llvm/codegen_amdgpu.cc:177: ; ModuleID = 'myadd__kernel0'
source_filename = "myadd__kernel0"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn-amd-amdhsa-hcc"

; Function Attrs: nounwind
define amdgpu_kernel void @myadd__kernel0(float addrspace(1)* noalias nocapture, float addrspace(1)* noalias nocapture readonly, float addrspace(1)* noalias nocapture readonly, i32) local_unnamed_addr #0 {
entry:
  %4 = tail call i32 @llvm.amdgcn.workitem.id.x()
  %5 = tail call i32 @llvm.amdgcn.workgroup.id.x()
  %6 = shl nsw i32 %4, 7
  %7 = sub nsw i32 %3, %6
  %8 = icmp slt i32 %5, %7
  br i1 %8, label %if_then, label %if_end, !prof !0

if_then:                                          ; preds = %entry
  %9 = add nsw i32 %6, %5
  %10 = sext i32 %9 to i64
  %11 = getelementptr inbounds float, float addrspace(1)* %2, i64 %10
  %12 = load float, float addrspace(1)* %11, align 4, !tbaa !1
  %13 = getelementptr inbounds float, float addrspace(1)* %1, i64 %10
  %14 = load float, float addrspace(1)* %13, align 4, !tbaa !5
  %15 = fadd float %12, %14
  %16 = getelementptr inbounds float, float addrspace(1)* %0, i64 %10
  store float %15, float addrspace(1)* %16, align 4, !tbaa !8
  br label %if_end

if_end:                                           ; preds = %if_then, %entry
  ret void
}

; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.workitem.id.x() #1

; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.workgroup.id.x() #1

; Function Attrs: nounwind
define amdgpu_kernel void @myadd__kernel1(i8 addrspace(1)* noalias nocapture, float addrspace(1)* noalias nocapture readonly, i32) local_unnamed_addr #0 {
entry:
  %3 = tail call i32 @llvm.amdgcn.workitem.id.x()
  %4 = tail call i32 @llvm.amdgcn.workgroup.id.x()
  %5 = shl nsw i32 %3, 7
  %6 = sub nsw i32 %2, %5
  %7 = icmp slt i32 %4, %6
  br i1 %7, label %if_then, label %if_end, !prof !0

if_then:                                          ; preds = %entry
  %8 = add nsw i32 %5, %4
  %9 = sext i32 %8 to i64
  %10 = getelementptr inbounds float, float addrspace(1)* %1, i64 %9
  %11 = load float, float addrspace(1)* %10, align 4, !tbaa !8
  %12 = fadd float %11, 1.000000e+00
  %13 = bitcast i8 addrspace(1)* %0 to float addrspace(1)*
  %14 = getelementptr inbounds float, float addrspace(1)* %13, i64 %9
  store float %12, float addrspace(1)* %14, align 4, !tbaa !11
  br label %if_end

if_end:                                           ; preds = %if_then, %entry
  ret void
}

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone speculatable }

!0 = !{!"branch_weights", i32 1048576, i32 1}
!1 = !{!2, !2, i64 0}
!2 = !{!"float32", !3, i64 0}
!3 = !{!"0x13bd690", !4, i64 0}
!4 = !{!"tvm-tbaa"}
!5 = !{!6, !6, i64 0}
!6 = !{!"float32", !7, i64 0}
!7 = !{!"0x13bc5b0", !4, i64 0}
!8 = !{!9, !9, i64 0}
!9 = !{!"float32", !10, i64 0}
!10 = !{!"0x13bd510", !4, i64 0}
!11 = !{!12, !12, i64 0}
!12 = !{!"float32", !13, i64 0}
!13 = !{!"0x13bddd0", !4, i64 0}
	[23:36:46] src/codegen/llvm/codegen_amdgpu.cc:177: ; ModuleID = 'myadd__kernel0'
	source_filename = "myadd__kernel0"
	target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
	target triple = "amdgcn-amd-amdhsa-hcc"

	; Function Attrs: nounwind
	define amdgpu_kernel void @myadd__kernel0(float addrspace(1)* noalias nocapture, float addrspace(1)* noalias nocapture readonly, float addrspace(1)* noalias nocapture readonly, i32) local_unnamed_addr #0 {
	entry:
	%4 = tail call i32 @llvm.amdgcn.workitem.id.x()
	%5 = tail call i32 @llvm.amdgcn.workgroup.id.x()
	%6 = shl nsw i32 %4, 7
	%7 = sub nsw i32 %3, %6
	%8 = icmp slt i32 %5, %7
	br i1 %8, label %if_then, label %if_end, !prof !0

	if_then: ; preds = %entry
	%9 = add nsw i32 %6, %5
	%10 = sext i32 %9 to i64
	%11 = getelementptr inbounds float, float addrspace(1)* %2, i64 %10
	%12 = load float, float addrspace(1)* %11, align 4, !tbaa !1
	%13 = getelementptr inbounds float, float addrspace(1)* %1, i64 %10
	%14 = load float, float addrspace(1)* %13, align 4, !tbaa !5
	%15 = fadd float %12, %14
	%16 = getelementptr inbounds float, float addrspace(1)* %0, i64 %10
	store float %15, float addrspace(1)* %16, align 4, !tbaa !8
	br label %if_end

	if_end: ; preds = %if_then, %entry
	ret void
	}

	; Function Attrs: nounwind readnone speculatable
	declare i32 @llvm.amdgcn.workitem.id.x() #1

	; Function Attrs: nounwind readnone speculatable
	declare i32 @llvm.amdgcn.workgroup.id.x() #1

	; Function Attrs: nounwind
	define amdgpu_kernel void @myadd__kernel1(i8 addrspace(1)* noalias nocapture, float addrspace(1)* noalias nocapture readonly, i32) local_unnamed_addr #0 {
	entry:
	%3 = tail call i32 @llvm.amdgcn.workitem.id.x()
	%4 = tail call i32 @llvm.amdgcn.workgroup.id.x()
	%5 = shl nsw i32 %3, 7
	%6 = sub nsw i32 %2, %5
	%7 = icmp slt i32 %4, %6
	br i1 %7, label %if_then, label %if_end, !prof !0

	if_then: ; preds = %entry
	%8 = add nsw i32 %5, %4
	%9 = sext i32 %8 to i64
	%10 = getelementptr inbounds float, float addrspace(1)* %1, i64 %9
	%11 = load float, float addrspace(1)* %10, align 4, !tbaa !8
	%12 = fadd float %11, 1.000000e+00
	%13 = bitcast i8 addrspace(1)* %0 to float addrspace(1)*
	%14 = getelementptr inbounds float, float addrspace(1)* %13, i64 %9
	store float %12, float addrspace(1)* %14, align 4, !tbaa !11
	br label %if_end

	if_end: ; preds = %if_then, %entry
	ret void
	}

	attributes #0 = { nounwind }
	attributes #1 = { nounwind readnone speculatable }

	!0 = !{!"branch_weights", i32 1048576, i32 1}
	!1 = !{!2, !2, i64 0}
	!2 = !{!"float32", !3, i64 0}
	!3 = !{!"0x13bd690", !4, i64 0}
	!4 = !{!"tvm-tbaa"}
	!5 = !{!6, !6, i64 0}
	!6 = !{!"float32", !7, i64 0}
	!7 = !{!"0x13bc5b0", !4, i64 0}
	!8 = !{!9, !9, i64 0}
	!9 = !{!"float32", !10, i64 0}
	!10 = !{!"0x13bd510", !4, i64 0}
	!11 = !{!12, !12, i64 0}
	!12 = !{!"float32", !13, i64 0}
	!13 = !{!"0x13bddd0", !4, i64 0}