Skip to content

Instantly share code, notes, and snippets.

@cheshire
Last active February 7, 2020 23:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cheshire/17067d5ba4781817861c8b21d15c928d to your computer and use it in GitHub Desktop.
Save cheshire/17067d5ba4781817861c8b21d15c928d to your computer and use it in GitHub Desktop.
Input IR without optimizations
; ModuleID = '__compute_module'
source_filename = "__compute_module"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"
@0 = private unnamed_addr constant [36 x i8] c"\01\00\00\00\00\00\00\01\00\00\00\00\00\00\01\00\00\00\00\00\00\01\00\00\00\00\00\00\01\00\00\00\00\00\00\01"
@1 = private unnamed_addr constant [8 x i8] zeroinitializer
; Function Attrs: uwtable
define void @EntryModule(i8* %retval, i8* noalias %run_options, i8** noalias %params, i8** noalias %buffer_table, i64* noalias %prof_counters) #0 {
entry:
%fusion.invar_address.dim.1 = alloca i64
%fusion.invar_address.dim.0 = alloca i64
%p4.invar_address.dim.1 = alloca i64
%p4.invar_address.dim.0 = alloca i64
%0 = getelementptr inbounds i8*, i8** %buffer_table, i64 2
%1 = load i8*, i8** %0, !invariant.load !0, !dereferenceable !1, !align !1
%input0 = bitcast i8* %1 to double*
%2 = getelementptr inbounds i8*, i8** %buffer_table, i64 0
%3 = load i8*, i8** %2, !invariant.load !0, !dereferenceable !2, !align !1
%p4 = bitcast i8* %3 to [2 x [2 x double]]*
store i64 0, i64* %p4.invar_address.dim.0
br label %p4.loop_header.dim.0
p4.loop_header.dim.0: ; preds = %p4.loop_exit.dim.1, %entry
%p4.indvar.dim.0 = load i64, i64* %p4.invar_address.dim.0
%4 = icmp uge i64 %p4.indvar.dim.0, 2
br i1 %4, label %p4.loop_exit.dim.0, label %p4.loop_body.dim.0
p4.loop_body.dim.0: ; preds = %p4.loop_header.dim.0
store i64 0, i64* %p4.invar_address.dim.1
br label %p4.loop_header.dim.1
p4.loop_header.dim.1: ; preds = %p4.loop_body.dim.1, %p4.loop_body.dim.0
%p4.indvar.dim.1 = load i64, i64* %p4.invar_address.dim.1
%5 = icmp uge i64 %p4.indvar.dim.1, 2
br i1 %5, label %p4.loop_exit.dim.1, label %p4.loop_body.dim.1
p4.loop_body.dim.1: ; preds = %p4.loop_header.dim.1
%6 = load double, double* %input0, !invariant.load !0, !noalias !3
%7 = getelementptr inbounds [2 x [2 x double]], [2 x [2 x double]]* %p4, i64 0, i64 %p4.indvar.dim.0, i64 %p4.indvar.dim.1
store double %6, double* %7, !alias.scope !3, !noalias !6
%invar.inc1 = add nuw nsw i64 %p4.indvar.dim.1, 1
store i64 %invar.inc1, i64* %p4.invar_address.dim.1
br label %p4.loop_header.dim.1
p4.loop_exit.dim.1: ; preds = %p4.loop_header.dim.1
%invar.inc = add nuw nsw i64 %p4.indvar.dim.0, 1
store i64 %invar.inc, i64* %p4.invar_address.dim.0
br label %p4.loop_header.dim.0
p4.loop_exit.dim.0: ; preds = %p4.loop_header.dim.0
%8 = getelementptr inbounds i8*, i8** %buffer_table, i64 3
%9 = load i8*, i8** %8, !invariant.load !0, !dereferenceable !8, !align !1
%dot_81 = bitcast i8* %9 to [2 x [2 x double]]*
%10 = bitcast [2 x [2 x double]]* %dot_81 to double*
%11 = bitcast [2 x [2 x double]]* %p4 to double*
%12 = bitcast [2 x [2 x double]]* %p4 to double*
call void @__xla_cpu_runtime_EigenMatMulF64(i8* %run_options, double* %10, double* %11, double* %12, i64 2, i64 2, i64 2, i32 1, i32 0)
%13 = getelementptr inbounds i8*, i8** %buffer_table, i64 1
%14 = load i8*, i8** %13, !invariant.load !0, !dereferenceable !9, !align !1
%arg7_8 = bitcast i8* %14 to [3 x [3 x double]]*
%15 = getelementptr inbounds i8*, i8** %buffer_table, i64 0
%16 = load i8*, i8** %15, !invariant.load !0, !dereferenceable !2, !align !1
%fusion = bitcast i8* %16 to [6 x [6 x double]]*
store i64 0, i64* %fusion.invar_address.dim.0
br label %fusion.loop_header.dim.0
fusion.loop_header.dim.0: ; preds = %fusion.loop_exit.dim.1, %p4.loop_exit.dim.0
%fusion.indvar.dim.0 = load i64, i64* %fusion.invar_address.dim.0
%17 = icmp uge i64 %fusion.indvar.dim.0, 6
br i1 %17, label %fusion.loop_exit.dim.0, label %fusion.loop_body.dim.0
fusion.loop_body.dim.0: ; preds = %fusion.loop_header.dim.0
store i64 0, i64* %fusion.invar_address.dim.1
br label %fusion.loop_header.dim.1
fusion.loop_header.dim.1: ; preds = %fusion.loop_body.dim.1, %fusion.loop_body.dim.0
%fusion.indvar.dim.1 = load i64, i64* %fusion.invar_address.dim.1
%18 = icmp uge i64 %fusion.indvar.dim.1, 6
br i1 %18, label %fusion.loop_exit.dim.1, label %fusion.loop_body.dim.1
fusion.loop_body.dim.1: ; preds = %fusion.loop_header.dim.1
%19 = mul nuw nsw i64 %fusion.indvar.dim.1, 1
%20 = add nuw nsw i64 0, %19
%21 = urem i64 %20, 3
%22 = udiv i64 %20, 3
%23 = udiv i64 %22, 2
%24 = mul nuw nsw i64 %fusion.indvar.dim.0, 1
%25 = add nuw nsw i64 0, %24
%26 = urem i64 %25, 3
%27 = udiv i64 %25, 3
%28 = udiv i64 %27, 2
%29 = getelementptr inbounds [2 x [3 x [2 x [3 x i8]]]], [2 x [3 x [2 x [3 x i8]]]]* bitcast ([36 x i8]* @0 to [2 x [3 x [2 x [3 x i8]]]]*), i64 0, i64 %27, i64 %26, i64 %22, i64 %21
%30 = load i8, i8* %29
%31 = getelementptr inbounds [2 x [2 x double]], [2 x [2 x double]]* %dot_81, i64 0, i64 %27, i64 %22
%32 = load double, double* %31, !alias.scope !6, !noalias !10
%33 = getelementptr inbounds [3 x [3 x double]], [3 x [3 x double]]* %arg7_8, i64 0, i64 %26, i64 %21
%34 = load double, double* %33, !invariant.load !0, !noalias !12
%35 = fmul fast double %32, %34
%36 = load double, double* bitcast ([8 x i8]* @1 to double*)
%37 = trunc i8 %30 to i1
%38 = select fast i1 %37, double %35, double %36
%39 = getelementptr inbounds [6 x [6 x double]], [6 x [6 x double]]* %fusion, i64 0, i64 %fusion.indvar.dim.0, i64 %fusion.indvar.dim.1
store double %38, double* %39, !alias.scope !13, !noalias !6
%invar.inc3 = add nuw nsw i64 %fusion.indvar.dim.1, 1
store i64 %invar.inc3, i64* %fusion.invar_address.dim.1
br label %fusion.loop_header.dim.1
fusion.loop_exit.dim.1: ; preds = %fusion.loop_header.dim.1
%invar.inc2 = add nuw nsw i64 %fusion.indvar.dim.0, 1
store i64 %invar.inc2, i64* %fusion.invar_address.dim.0
br label %fusion.loop_header.dim.0
fusion.loop_exit.dim.0: ; preds = %fusion.loop_header.dim.0
ret void
}
; Function Attrs: argmemonly nounwind
declare void @__xla_cpu_runtime_EigenMatMulF64(i8*, double*, double*, double*, i64, i64, i64, i32, i32) #1
attributes #0 = { uwtable "denormal-fp-math"="preserve-sign" "no-frame-pointer-elim"="false" }
attributes #1 = { argmemonly nounwind }
!0 = !{}
!1 = !{i64 8}
!2 = !{i64 288}
!3 = !{!4}
!4 = !{!"buffer: {index:0, offset:0, size:32}", !5}
!5 = !{!"XLA global AA domain"}
!6 = !{!7}
!7 = !{!"buffer: {index:3, offset:0, size:32}", !5}
!8 = !{i64 32}
!9 = !{i64 72}
!10 = !{!4, !11}
!11 = !{!"buffer: {index:0, offset:0, size:288}", !5}
!12 = !{!11, !7}
!13 = !{!11}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment