Skip to content

Instantly share code, notes, and snippets.

@cheshire
Last active February 7, 2020 23:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cheshire/8bea1f36ab849f8945bc190b519272a6 to your computer and use it in GitHub Desktop.
Save cheshire/8bea1f36ab849f8945bc190b519272a6 to your computer and use it in GitHub Desktop.
Good input IR with optimizations
; ModuleID = '__compute_module'
source_filename = "__compute_module"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"
; Function Attrs: nounwind uwtable
define void @EntryModule(i8* nocapture readnone %retval, i8* noalias %run_options, i8** noalias nocapture readnone %params, i8** noalias nocapture readonly %buffer_table, i64* noalias nocapture readnone %prof_counters) local_unnamed_addr #0 {
entry:
%0 = getelementptr inbounds i8*, i8** %buffer_table, i64 2
%1 = bitcast i8** %0 to i64**
%2 = load i64*, i64** %1, align 8, !invariant.load !0, !dereferenceable !1, !align !1
%3 = load i8*, i8** %buffer_table, align 8, !invariant.load !0, !dereferenceable !2, !align !1
%4 = load i64, i64* %2, align 8, !invariant.load !0, !noalias !3
%5 = insertelement <4 x i64> undef, i64 %4, i32 0
%6 = shufflevector <4 x i64> %5, <4 x i64> undef, <4 x i32> zeroinitializer
%7 = bitcast i8* %3 to <4 x i64>*
store <4 x i64> %6, <4 x i64>* %7, align 8, !alias.scope !3, !noalias !6
%8 = getelementptr inbounds i8*, i8** %buffer_table, i64 3
%9 = bitcast i8** %8 to [2 x [2 x double]]**
%10 = load [2 x [2 x double]]*, [2 x [2 x double]]** %9, align 8, !invariant.load !0, !dereferenceable !8, !align !1
%11 = getelementptr inbounds [2 x [2 x double]], [2 x [2 x double]]* %10, i64 0, i64 0, i64 0
%12 = bitcast i8* %3 to double*
tail call void @__xla_cpu_runtime_EigenMatMulF64(i8* %run_options, double* %11, double* %12, double* %12, i64 2, i64 2, i64 2, i32 1, i32 0)
%13 = getelementptr inbounds i8*, i8** %buffer_table, i64 1
%14 = bitcast i8** %13 to [3 x [3 x double]]**
%15 = load [3 x [3 x double]]*, [3 x [3 x double]]** %14, align 8, !invariant.load !0, !dereferenceable !9, !align !1
%16 = load double, double* %11, align 8, !alias.scope !6, !noalias !10
%17 = getelementptr inbounds [3 x [3 x double]], [3 x [3 x double]]* %15, i64 0, i64 0, i64 0
%18 = load double, double* %17, align 8, !invariant.load !0, !noalias !12
%19 = fmul fast double %18, %16
%20 = bitcast i8* %3 to double*
store double %19, double* %20, align 8, !alias.scope !13, !noalias !6
%21 = getelementptr inbounds i8, i8* %3, i64 8
%22 = bitcast i8* %21 to <2 x double>*
store <2 x double> zeroinitializer, <2 x double>* %22, align 8, !alias.scope !13, !noalias !6
%23 = getelementptr inbounds i8, i8* %3, i64 24
call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(24) %23, i8 0, i64 24, i1 false)
%24 = getelementptr inbounds i8, i8* %3, i64 48
%25 = bitcast i8* %24 to double*
store double 0.000000e+00, double* %25, align 8, !alias.scope !13, !noalias !6
%26 = getelementptr inbounds [3 x [3 x double]], [3 x [3 x double]]* %15, i64 0, i64 1, i64 1
%27 = load double, double* %26, align 8, !invariant.load !0, !noalias !12
%28 = fmul fast double %27, %16
%29 = getelementptr inbounds i8, i8* %3, i64 56
%30 = bitcast i8* %29 to double*
store double %28, double* %30, align 8, !alias.scope !13, !noalias !6
%31 = getelementptr inbounds i8, i8* %3, i64 64
call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(32) %31, i8 0, i64 32, i1 false)
%32 = getelementptr inbounds i8, i8* %3, i64 96
%33 = bitcast i8* %32 to <2 x double>*
store <2 x double> zeroinitializer, <2 x double>* %33, align 8, !alias.scope !13, !noalias !6
%34 = getelementptr inbounds [3 x [3 x double]], [3 x [3 x double]]* %15, i64 0, i64 2, i64 2
%35 = load double, double* %34, align 8, !invariant.load !0, !noalias !12
%36 = fmul fast double %35, %16
%37 = getelementptr inbounds i8, i8* %3, i64 112
%38 = bitcast i8* %37 to double*
store double %36, double* %38, align 8, !alias.scope !13, !noalias !6
%39 = getelementptr inbounds i8, i8* %3, i64 120
call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(24) %39, i8 0, i64 24, i1 false)
%40 = getelementptr inbounds i8, i8* %3, i64 144
%41 = getelementptr inbounds [2 x [2 x double]], [2 x [2 x double]]* %10, i64 0, i64 1, i64 1
call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(24) %40, i8 0, i64 24, i1 false)
%42 = load double, double* %41, align 8, !alias.scope !6, !noalias !10
%43 = fmul fast double %18, %42
%44 = getelementptr inbounds i8, i8* %3, i64 168
%45 = bitcast i8* %44 to double*
store double %43, double* %45, align 8, !alias.scope !13, !noalias !6
%46 = getelementptr inbounds i8, i8* %3, i64 176
%47 = fmul fast double %27, %42
%48 = getelementptr inbounds i8, i8* %3, i64 224
%49 = bitcast i8* %48 to double*
call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(48) %46, i8 0, i64 48, i1 false)
store double %47, double* %49, align 8, !alias.scope !13, !noalias !6
%50 = getelementptr inbounds i8, i8* %3, i64 232
%51 = fmul fast double %35, %42
%52 = getelementptr inbounds i8, i8* %3, i64 280
%53 = bitcast i8* %52 to double*
call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(48) %50, i8 0, i64 48, i1 false)
store double %51, double* %53, align 8, !alias.scope !13, !noalias !6
ret void
}
; Function Attrs: argmemonly nounwind
declare void @__xla_cpu_runtime_EigenMatMulF64(i8*, double*, double*, double*, i64, i64, i64, i32, i32) local_unnamed_addr #1
; Function Attrs: argmemonly nounwind willreturn writeonly
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2
attributes #0 = { nounwind uwtable "denormal-fp-math"="preserve-sign" "no-frame-pointer-elim"="false" }
attributes #1 = { argmemonly nounwind }
attributes #2 = { argmemonly nounwind willreturn writeonly }
!0 = !{}
!1 = !{i64 8}
!2 = !{i64 288}
!3 = !{!4}
!4 = !{!"buffer: {index:0, offset:0, size:32}", !5}
!5 = !{!"XLA global AA domain"}
!6 = !{!7}
!7 = !{!"buffer: {index:3, offset:0, size:32}", !5}
!8 = !{i64 32}
!9 = !{i64 72}
!10 = !{!4, !11}
!11 = !{!"buffer: {index:0, offset:0, size:288}", !5}
!12 = !{!11, !7}
!13 = !{!11}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment