Skip to content

Instantly share code, notes, and snippets.

@masahi
Created October 10, 2017 14:39
Show Gist options
  • Save masahi/2c81b07aaf2f2e58cd0a053fe3d1fb02 to your computer and use it in GitHub Desktop.
Save masahi/2c81b07aaf2f2e58cd0a053fe3d1fb02 to your computer and use it in GitHub Desktop.
[23:36:46] src/codegen/llvm/codegen_amdgpu.cc:177: ; ModuleID = 'myadd__kernel0'
source_filename = "myadd__kernel0"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn-amd-amdhsa-hcc"
; Function Attrs: nounwind
define amdgpu_kernel void @myadd__kernel0(float addrspace(1)* noalias nocapture, float addrspace(1)* noalias nocapture readonly, float addrspace(1)* noalias nocapture readonly, i32) local_unnamed_addr #0 {
entry:
%4 = tail call i32 @llvm.amdgcn.workitem.id.x()
%5 = tail call i32 @llvm.amdgcn.workgroup.id.x()
%6 = shl nsw i32 %4, 7
%7 = sub nsw i32 %3, %6
%8 = icmp slt i32 %5, %7
br i1 %8, label %if_then, label %if_end, !prof !0
if_then: ; preds = %entry
%9 = add nsw i32 %6, %5
%10 = sext i32 %9 to i64
%11 = getelementptr inbounds float, float addrspace(1)* %2, i64 %10
%12 = load float, float addrspace(1)* %11, align 4, !tbaa !1
%13 = getelementptr inbounds float, float addrspace(1)* %1, i64 %10
%14 = load float, float addrspace(1)* %13, align 4, !tbaa !5
%15 = fadd float %12, %14
%16 = getelementptr inbounds float, float addrspace(1)* %0, i64 %10
store float %15, float addrspace(1)* %16, align 4, !tbaa !8
br label %if_end
if_end: ; preds = %if_then, %entry
ret void
}
; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.workitem.id.x() #1
; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.workgroup.id.x() #1
; Function Attrs: nounwind
define amdgpu_kernel void @myadd__kernel1(i8 addrspace(1)* noalias nocapture, float addrspace(1)* noalias nocapture readonly, i32) local_unnamed_addr #0 {
entry:
%3 = tail call i32 @llvm.amdgcn.workitem.id.x()
%4 = tail call i32 @llvm.amdgcn.workgroup.id.x()
%5 = shl nsw i32 %3, 7
%6 = sub nsw i32 %2, %5
%7 = icmp slt i32 %4, %6
br i1 %7, label %if_then, label %if_end, !prof !0
if_then: ; preds = %entry
%8 = add nsw i32 %5, %4
%9 = sext i32 %8 to i64
%10 = getelementptr inbounds float, float addrspace(1)* %1, i64 %9
%11 = load float, float addrspace(1)* %10, align 4, !tbaa !8
%12 = fadd float %11, 1.000000e+00
%13 = bitcast i8 addrspace(1)* %0 to float addrspace(1)*
%14 = getelementptr inbounds float, float addrspace(1)* %13, i64 %9
store float %12, float addrspace(1)* %14, align 4, !tbaa !11
br label %if_end
if_end: ; preds = %if_then, %entry
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone speculatable }
!0 = !{!"branch_weights", i32 1048576, i32 1}
!1 = !{!2, !2, i64 0}
!2 = !{!"float32", !3, i64 0}
!3 = !{!"0x13bd690", !4, i64 0}
!4 = !{!"tvm-tbaa"}
!5 = !{!6, !6, i64 0}
!6 = !{!"float32", !7, i64 0}
!7 = !{!"0x13bc5b0", !4, i64 0}
!8 = !{!9, !9, i64 0}
!9 = !{!"float32", !10, i64 0}
!10 = !{!"0x13bd510", !4, i64 0}
!11 = !{!12, !12, i64 0}
!12 = !{!"float32", !13, i64 0}
!13 = !{!"0x13bddd0", !4, i64 0}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment