Created
October 10, 2017 14:39
-
-
Save masahi/2c81b07aaf2f2e58cd0a053fe3d1fb02 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[23:36:46] src/codegen/llvm/codegen_amdgpu.cc:177: ; ModuleID = 'myadd__kernel0' | |
source_filename = "myadd__kernel0" | |
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" | |
target triple = "amdgcn-amd-amdhsa-hcc" | |
; Function Attrs: nounwind | |
define amdgpu_kernel void @myadd__kernel0(float addrspace(1)* noalias nocapture, float addrspace(1)* noalias nocapture readonly, float addrspace(1)* noalias nocapture readonly, i32) local_unnamed_addr #0 { | |
entry: | |
%4 = tail call i32 @llvm.amdgcn.workitem.id.x() | |
%5 = tail call i32 @llvm.amdgcn.workgroup.id.x() | |
%6 = shl nsw i32 %4, 7 | |
%7 = sub nsw i32 %3, %6 | |
%8 = icmp slt i32 %5, %7 | |
br i1 %8, label %if_then, label %if_end, !prof !0 | |
if_then: ; preds = %entry | |
%9 = add nsw i32 %6, %5 | |
%10 = sext i32 %9 to i64 | |
%11 = getelementptr inbounds float, float addrspace(1)* %2, i64 %10 | |
%12 = load float, float addrspace(1)* %11, align 4, !tbaa !1 | |
%13 = getelementptr inbounds float, float addrspace(1)* %1, i64 %10 | |
%14 = load float, float addrspace(1)* %13, align 4, !tbaa !5 | |
%15 = fadd float %12, %14 | |
%16 = getelementptr inbounds float, float addrspace(1)* %0, i64 %10 | |
store float %15, float addrspace(1)* %16, align 4, !tbaa !8 | |
br label %if_end | |
if_end: ; preds = %if_then, %entry | |
ret void | |
} | |
; Function Attrs: nounwind readnone speculatable | |
declare i32 @llvm.amdgcn.workitem.id.x() #1 | |
; Function Attrs: nounwind readnone speculatable | |
declare i32 @llvm.amdgcn.workgroup.id.x() #1 | |
; Function Attrs: nounwind | |
define amdgpu_kernel void @myadd__kernel1(i8 addrspace(1)* noalias nocapture, float addrspace(1)* noalias nocapture readonly, i32) local_unnamed_addr #0 { | |
entry: | |
%3 = tail call i32 @llvm.amdgcn.workitem.id.x() | |
%4 = tail call i32 @llvm.amdgcn.workgroup.id.x() | |
%5 = shl nsw i32 %3, 7 | |
%6 = sub nsw i32 %2, %5 | |
%7 = icmp slt i32 %4, %6 | |
br i1 %7, label %if_then, label %if_end, !prof !0 | |
if_then: ; preds = %entry | |
%8 = add nsw i32 %5, %4 | |
%9 = sext i32 %8 to i64 | |
%10 = getelementptr inbounds float, float addrspace(1)* %1, i64 %9 | |
%11 = load float, float addrspace(1)* %10, align 4, !tbaa !8 | |
%12 = fadd float %11, 1.000000e+00 | |
%13 = bitcast i8 addrspace(1)* %0 to float addrspace(1)* | |
%14 = getelementptr inbounds float, float addrspace(1)* %13, i64 %9 | |
store float %12, float addrspace(1)* %14, align 4, !tbaa !11 | |
br label %if_end | |
if_end: ; preds = %if_then, %entry | |
ret void | |
} | |
attributes #0 = { nounwind } | |
attributes #1 = { nounwind readnone speculatable } | |
!0 = !{!"branch_weights", i32 1048576, i32 1} | |
!1 = !{!2, !2, i64 0} | |
!2 = !{!"float32", !3, i64 0} | |
!3 = !{!"0x13bd690", !4, i64 0} | |
!4 = !{!"tvm-tbaa"} | |
!5 = !{!6, !6, i64 0} | |
!6 = !{!"float32", !7, i64 0} | |
!7 = !{!"0x13bc5b0", !4, i64 0} | |
!8 = !{!9, !9, i64 0} | |
!9 = !{!"float32", !10, i64 0} | |
!10 = !{!"0x13bd510", !4, i64 0} | |
!11 = !{!12, !12, i64 0} | |
!12 = !{!"float32", !13, i64 0} | |
!13 = !{!"0x13bddd0", !4, i64 0} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment