Skip to content

Instantly share code, notes, and snippets.

@cheshire
Created February 6, 2020 23:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cheshire/9dff84d8fbb83278736278854c746d8d to your computer and use it in GitHub Desktop.
Save cheshire/9dff84d8fbb83278736278854c746d8d to your computer and use it in GitHub Desktop.
no optimizations, with gemm
; ModuleID = '__compute_module'
source_filename = "__compute_module"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"
@0 = private unnamed_addr constant [8 x i8] zeroinitializer, align 8
@1 = private unnamed_addr constant [4 x i8] c"\00\00\80\BF"
@2 = private unnamed_addr constant [4 x i8] c"\00\00\00@"
@3 = private unnamed_addr constant [4 x i8] zeroinitializer
@4 = private unnamed_addr constant [8 x i8] c"\00\00\00\00\00\00\F0?"
@5 = private unnamed_addr constant [8 x i8] c"4\EF\FC\99y\82\DA?"
@6 = private unnamed_addr constant [8 x i8] c"\FD\FF\FF\FF\FF\FF\FF\FF"
@7 = private unnamed_addr constant [8 x i8] zeroinitializer
@8 = private unnamed_addr constant [8 x i8] zeroinitializer
@9 = private unnamed_addr constant [4 x i8] c"\00\00\80\BF"
@10 = private unnamed_addr constant [4 x i8] c"\00\00\00@"
@11 = private unnamed_addr constant [4 x i8] zeroinitializer
@12 = private unnamed_addr constant [8 x i8] c"\00\00\00\00\00\00\F0?"
@13 = private unnamed_addr constant [8 x i8] c"4\EF\FC\99y\82\DA?"
@14 = private unnamed_addr constant [8 x i8] c"\FE\FF\FF\FF\FF\FF\FF\FF"
@15 = private unnamed_addr constant [8 x i8] zeroinitializer
@16 = private unnamed_addr constant [8 x i8] zeroinitializer
@17 = private unnamed_addr constant [4 x i8] c"\00\00\80\BF"
@18 = private unnamed_addr constant [4 x i8] c"\00\00\00@"
@19 = private unnamed_addr constant [4 x i8] zeroinitializer
@20 = private unnamed_addr constant [8 x i8] c"\00\00\00\00\00\00\F0?"
@21 = private unnamed_addr constant [8 x i8] c"4\EF\FC\99y\82\DA?"
@22 = private unnamed_addr constant [8 x i8] zeroinitializer
@23 = private unnamed_addr constant [8 x i8] zeroinitializer
@24 = private unnamed_addr constant [8 x i8] zeroinitializer
@25 = private unnamed_addr constant [4 x i8] c"\00\00\80\BF"
@26 = private unnamed_addr constant [4 x i8] c"\00\00\00@"
@27 = private unnamed_addr constant [4 x i8] zeroinitializer
@28 = private unnamed_addr constant [8 x i8] c"\00\00\00\00\00\00\F0?"
@29 = private unnamed_addr constant [8 x i8] c"4\EF\FC\99y\82\DA?"
; Function Attrs: uwtable
define internal void @add_F64.98(i8* %retval, i8* noalias %run_options, i8** noalias %params, i8** noalias %buffer_table, i64* noalias %prof_counters) #0 {
entry:
%add.101 = alloca double, align 8
%0 = getelementptr inbounds i8*, i8** %params, i64 0
%1 = load i8*, i8** %0, !dereferenceable !0, !align !0
%lhs.99 = bitcast i8* %1 to double*
%2 = getelementptr inbounds i8*, i8** %params, i64 1
%3 = load i8*, i8** %2, !dereferenceable !0, !align !0
%rhs.100 = bitcast i8* %3 to double*
%4 = load double, double* %lhs.99, !alias.scope !1, !noalias !4
%5 = load double, double* %rhs.100, !alias.scope !6, !noalias !4
%6 = fadd fast double %4, %5
store double %6, double* %add.101, !alias.scope !4
%load_ret_value = load double, double* %add.101
%7 = bitcast i8* %retval to double*
store double %load_ret_value, double* %7
ret void
}
; Function Attrs: uwtable
define internal void @add_F64.181(i8* %retval, i8* noalias %run_options, i8** noalias %params, i8** noalias %buffer_table, i64* noalias %prof_counters) #0 {
entry:
%add.184 = alloca double, align 8
%0 = getelementptr inbounds i8*, i8** %params, i64 0
%1 = load i8*, i8** %0, !dereferenceable !0, !align !0
%lhs.182 = bitcast i8* %1 to double*
%2 = getelementptr inbounds i8*, i8** %params, i64 1
%3 = load i8*, i8** %2, !dereferenceable !0, !align !0
%rhs.183 = bitcast i8* %3 to double*
%4 = load double, double* %lhs.182, !alias.scope !8, !noalias !10
%5 = load double, double* %rhs.183, !alias.scope !12, !noalias !10
%6 = fadd fast double %4, %5
store double %6, double* %add.184, !alias.scope !10
%load_ret_value = load double, double* %add.184
%7 = bitcast i8* %retval to double*
store double %load_ret_value, double* %7
ret void
}
; Function Attrs: uwtable
define internal void @add_F64.318(i8* %retval, i8* noalias %run_options, i8** noalias %params, i8** noalias %buffer_table, i64* noalias %prof_counters) #0 {
entry:
%add.321 = alloca double, align 8
%0 = getelementptr inbounds i8*, i8** %params, i64 0
%1 = load i8*, i8** %0, !dereferenceable !0, !align !0
%lhs.319 = bitcast i8* %1 to double*
%2 = getelementptr inbounds i8*, i8** %params, i64 1
%3 = load i8*, i8** %2, !dereferenceable !0, !align !0
%rhs.320 = bitcast i8* %3 to double*
%4 = load double, double* %lhs.319, !alias.scope !14, !noalias !16
%5 = load double, double* %rhs.320, !alias.scope !18, !noalias !16
%6 = fadd fast double %4, %5
store double %6, double* %add.321, !alias.scope !16
%load_ret_value = load double, double* %add.321
%7 = bitcast i8* %retval to double*
store double %load_ret_value, double* %7
ret void
}
; Function Attrs: uwtable
define void @EntryModule(i8* %retval, i8* noalias %run_options, i8** noalias %params, i8** noalias %buffer_table, i64* noalias %prof_counters) #0 {
entry:
%fusion.1.invar_address.dim.1 = alloca i64
%fusion.1.invar_address.dim.0 = alloca i64
%fusion.4.vectorized_inner.invar_address.reduction_dim.1 = alloca i64
%accumulator32 = alloca <2 x double>
%reduce.102.invar_address.dim.0 = alloca i64
%fusion.4.invar_address.dim.2 = alloca i64
%fusion.4.invar_address.dim.1 = alloca i64
%fusion.4.invar_address.dim.0 = alloca i64
%fusion.2.vectorized_inner.invar_address.reduction_dim.1 = alloca i64
%accumulator24 = alloca <2 x double>
%accumulator23 = alloca double
%reduce.185.invar_address.dim.0 = alloca i64
%fusion.2.invar_address.dim.2 = alloca i64
%fusion.2.invar_address.dim.1 = alloca i64
%fusion.2.invar_address.dim.0 = alloca i64
%fusion.vectorized_inner.invar_address.reduction_dim.1 = alloca i64
%accumulator15 = alloca <4 x double>
%accumulator = alloca <2 x double>
%reduce.322.invar_address.dim.0 = alloca i64
%fusion.invar_address.dim.2 = alloca i64
%fusion.invar_address.dim.1 = alloca i64
%fusion.invar_address.dim.0 = alloca i64
%accum_address10 = alloca double
%dot.invar_address.reduction = alloca i64
%dot.invar_address.rhs.0 = alloca i64
%dot.invar_address.lhs.0 = alloca i64
%fusion.5.invar_address.dim.1 = alloca i64
%fusion.5.invar_address.dim.0 = alloca i64
%accum_address = alloca double
%dot.1.invar_address.reduction = alloca i64
%dot.1.invar_address.rhs.0 = alloca i64
%dot.1.invar_address.lhs.0 = alloca i64
%fusion.3.invar_address.dim.1 = alloca i64
%fusion.3.invar_address.dim.0 = alloca i64
%0 = getelementptr inbounds i8*, i8** %buffer_table, i64 4
%1 = load i8*, i8** %0, !invariant.load !20, !dereferenceable !21, !align !0
%arg0.1 = bitcast i8* %1 to [3 x [1 x [1 x double]]]*
%2 = getelementptr inbounds i8*, i8** %buffer_table, i64 9
%3 = load i8*, i8** %2, !invariant.load !20, !dereferenceable !22, !align !0
%arg6.7 = bitcast i8* %3 to [1 x [3 x float]]*
%4 = getelementptr inbounds i8*, i8** %buffer_table, i64 5
%5 = load i8*, i8** %4, !invariant.load !20, !dereferenceable !21, !align !0
%arg7.8 = bitcast i8* %5 to [1 x [3 x double]]*
%6 = getelementptr inbounds i8*, i8** %buffer_table, i64 2
%7 = load i8*, i8** %6, !invariant.load !20, !dereferenceable !23, !align !0
%arg8.9 = bitcast i8* %7 to [1 x [3 x [3 x double]]]*
%8 = getelementptr inbounds i8*, i8** %buffer_table, i64 1
%9 = load i8*, i8** %8, !invariant.load !20, !dereferenceable !24, !align !0
%fusion.3 = bitcast i8* %9 to [3 x [3 x double]]*
store i64 0, i64* %fusion.3.invar_address.dim.0
br label %fusion.3.loop_header.dim.0
fusion.3.loop_header.dim.0: ; preds = %fusion.3.loop_exit.dim.1, %entry
%fusion.3.indvar.dim.0 = load i64, i64* %fusion.3.invar_address.dim.0
%10 = icmp uge i64 %fusion.3.indvar.dim.0, 3
br i1 %10, label %fusion.3.loop_exit.dim.0, label %fusion.3.loop_body.dim.0
fusion.3.loop_body.dim.0: ; preds = %fusion.3.loop_header.dim.0
store i64 0, i64* %fusion.3.invar_address.dim.1
br label %fusion.3.loop_header.dim.1
fusion.3.loop_header.dim.1: ; preds = %fusion.3.loop_body.dim.1, %fusion.3.loop_body.dim.0
%fusion.3.indvar.dim.1 = load i64, i64* %fusion.3.invar_address.dim.1
%11 = icmp uge i64 %fusion.3.indvar.dim.1, 3
br i1 %11, label %fusion.3.loop_exit.dim.1, label %fusion.3.loop_body.dim.1
fusion.3.loop_body.dim.1: ; preds = %fusion.3.loop_header.dim.1
%12 = mul nuw nsw i64 %fusion.3.indvar.dim.1, 1
%13 = add nuw nsw i64 0, %12
%14 = udiv i64 %13, 3
%15 = mul nuw nsw i64 %fusion.3.indvar.dim.0, 1
%16 = add nuw nsw i64 0, %15
%17 = udiv i64 %16, 3
%18 = mul nuw nsw i64 %13, 1
%19 = add nuw nsw i64 0, %18
%20 = udiv i64 %19, 3
%21 = mul nuw nsw i64 %16, 1
%22 = add nuw nsw i64 0, %21
%23 = udiv i64 %22, 3
%24 = mul nuw nsw i64 %22, 1
%25 = add nuw nsw i64 0, %24
%26 = trunc i64 %25 to i32
%27 = mul nuw nsw i64 %19, 1
%28 = add nuw nsw i64 0, %27
%29 = trunc i64 %28 to i32
%30 = icmp eq i32 %26, %29
%31 = zext i1 %30 to i8
%32 = mul nuw nsw i64 %13, 1
%33 = add nuw nsw i64 0, %32
%34 = udiv i64 %33, 3
%35 = load float, float* bitcast ([4 x i8]* @1 to float*)
%36 = load float, float* bitcast ([4 x i8]* @2 to float*)
%37 = getelementptr inbounds [1 x [3 x float]], [1 x [3 x float]]* %arg6.7, i64 0, i64 0, i64 %33
%38 = load float, float* %37, !invariant.load !20, !noalias !25
%39 = fmul fast float %36, %38
%40 = fadd fast float %35, %39
%41 = fcmp fast une float %40, %40
%42 = zext i1 %41 to i8
%43 = load float, float* bitcast ([4 x i8]* @3 to float*)
%44 = fcmp fast one float %40, 0.000000e+00
%45 = uitofp i1 %44 to float
%46 = call fast float @llvm.copysign.f32(float %45, float %40)
%47 = fcmp fast uno float %40, %40
%48 = select fast i1 %47, float %40, float %46
%49 = trunc i8 %42 to i1
%50 = select fast i1 %49, float %43, float %48
%51 = fpext float %50 to double
%52 = load double, double* bitcast ([8 x i8]* @4 to double*)
%53 = load double, double* bitcast ([8 x i8]* @5 to double*)
%54 = getelementptr inbounds [1 x [3 x double]], [1 x [3 x double]]* %arg7.8, i64 0, i64 0, i64 %33
%55 = load double, double* %54, !invariant.load !20, !noalias !25
%56 = fmul fast double %53, %55
%57 = fadd fast double %52, %56
%58 = fmul fast double %51, %57
%59 = mul nuw nsw i64 %13, 1
%60 = add nuw nsw i64 0, %59
%61 = udiv i64 %60, 3
%62 = mul nuw nsw i64 %16, 1
%63 = add nuw nsw i64 0, %62
%64 = udiv i64 %63, 3
%65 = load i64, i64* bitcast ([8 x i8]* @6 to i64*)
%66 = mul nuw nsw i64 %60, 1
%67 = add nuw nsw i64 0, %66
%68 = mul nuw nsw i64 %63, 1
%69 = add nuw nsw i64 0, %68
%70 = sub i64 %67, %69
%71 = icmp sle i64 %65, %70
%72 = zext i1 %71 to i8
%73 = load i64, i64* bitcast ([8 x i8]* @7 to i64*)
%74 = icmp sle i64 %70, %73
%75 = zext i1 %74 to i8
%76 = and i8 %72, %75
%77 = getelementptr inbounds [1 x [3 x [3 x double]]], [1 x [3 x [3 x double]]]* %arg8.9, i64 0, i64 0, i64 %16, i64 %13
%78 = load double, double* %77, !invariant.load !20, !noalias !25
%79 = load double, double* bitcast ([8 x i8]* @8 to double*)
%80 = trunc i8 %76 to i1
%81 = select fast i1 %80, double %78, double %79
%82 = trunc i8 %31 to i1
%83 = select fast i1 %82, double %58, double %81
%84 = getelementptr inbounds [3 x [3 x double]], [3 x [3 x double]]* %fusion.3, i64 0, i64 %fusion.3.indvar.dim.0, i64 %fusion.3.indvar.dim.1
store double %83, double* %84, !alias.scope !25, !noalias !27
%invar.inc1 = add nuw nsw i64 %fusion.3.indvar.dim.1, 1
store i64 %invar.inc1, i64* %fusion.3.invar_address.dim.1
br label %fusion.3.loop_header.dim.1
fusion.3.loop_exit.dim.1: ; preds = %fusion.3.loop_header.dim.1
%invar.inc = add nuw nsw i64 %fusion.3.indvar.dim.0, 1
store i64 %invar.inc, i64* %fusion.3.invar_address.dim.0
br label %fusion.3.loop_header.dim.0
fusion.3.loop_exit.dim.0: ; preds = %fusion.3.loop_header.dim.0
%85 = getelementptr inbounds i8*, i8** %buffer_table, i64 0
%86 = load i8*, i8** %85, !invariant.load !20, !dereferenceable !24, !align !0
%dot.1 = bitcast i8* %86 to [3 x [3 x double]]*
store i64 0, i64* %dot.1.invar_address.lhs.0
br label %dot.1.loop_header.lhs.0
dot.1.loop_header.lhs.0: ; preds = %dot.1.loop_exit.rhs.0, %fusion.3.loop_exit.dim.0
%dot.1.indvar.lhs.0 = load i64, i64* %dot.1.invar_address.lhs.0
%87 = icmp uge i64 %dot.1.indvar.lhs.0, 3
br i1 %87, label %dot.1.loop_exit.lhs.0, label %dot.1.loop_body.lhs.0
dot.1.loop_body.lhs.0: ; preds = %dot.1.loop_header.lhs.0
store i64 0, i64* %dot.1.invar_address.rhs.0
br label %dot.1.loop_header.rhs.0
dot.1.loop_header.rhs.0: ; preds = %dot.1.loop_exit.reduction, %dot.1.loop_body.lhs.0
%dot.1.indvar.rhs.0 = load i64, i64* %dot.1.invar_address.rhs.0
%88 = icmp uge i64 %dot.1.indvar.rhs.0, 3
br i1 %88, label %dot.1.loop_exit.rhs.0, label %dot.1.loop_body.rhs.0
dot.1.loop_body.rhs.0: ; preds = %dot.1.loop_header.rhs.0
store i64 0, i64* %dot.1.invar_address.reduction
store double 0.000000e+00, double* %accum_address
br label %dot.1.loop_header.reduction
dot.1.loop_header.reduction: ; preds = %dot.1.loop_body.reduction, %dot.1.loop_body.rhs.0
%dot.1.indvar.reduction = load i64, i64* %dot.1.invar_address.reduction
%89 = icmp uge i64 %dot.1.indvar.reduction, 3
br i1 %89, label %dot.1.loop_exit.reduction, label %dot.1.loop_body.reduction
dot.1.loop_body.reduction: ; preds = %dot.1.loop_header.reduction
%90 = getelementptr inbounds [3 x [3 x double]], [3 x [3 x double]]* %fusion.3, i64 0, i64 %dot.1.indvar.lhs.0, i64 %dot.1.indvar.reduction
%91 = load double, double* %90, !alias.scope !25, !noalias !27
%92 = getelementptr inbounds [3 x [3 x double]], [3 x [3 x double]]* %fusion.3, i64 0, i64 %dot.1.indvar.rhs.0, i64 %dot.1.indvar.reduction
%93 = load double, double* %92, !alias.scope !25, !noalias !27
%94 = load double, double* %accum_address
%95 = fmul fast double %91, %93
%96 = fadd fast double %94, %95
store double %96, double* %accum_address
%invar.inc4 = add nuw nsw i64 %dot.1.indvar.reduction, 1
store i64 %invar.inc4, i64* %dot.1.invar_address.reduction
br label %dot.1.loop_header.reduction, !llvm.loop !29
dot.1.loop_exit.reduction: ; preds = %dot.1.loop_header.reduction
%97 = load double, double* %accum_address
%98 = getelementptr inbounds [3 x [3 x double]], [3 x [3 x double]]* %dot.1, i64 0, i64 %dot.1.indvar.lhs.0, i64 %dot.1.indvar.rhs.0
store double %97, double* %98, !alias.scope !27, !noalias !31
%invar.inc3 = add nuw nsw i64 %dot.1.indvar.rhs.0, 1
store i64 %invar.inc3, i64* %dot.1.invar_address.rhs.0
br label %dot.1.loop_header.rhs.0
dot.1.loop_exit.rhs.0: ; preds = %dot.1.loop_header.rhs.0
%invar.inc2 = add nuw nsw i64 %dot.1.indvar.lhs.0, 1
store i64 %invar.inc2, i64* %dot.1.invar_address.lhs.0
br label %dot.1.loop_header.lhs.0
dot.1.loop_exit.lhs.0: ; preds = %dot.1.loop_header.lhs.0
%99 = getelementptr inbounds i8*, i8** %buffer_table, i64 3
%100 = load i8*, i8** %99, !invariant.load !20, !dereferenceable !35, !align !0
%arg1.2 = bitcast i8* %100 to [1 x [2 x [2 x double]]]*
%101 = getelementptr inbounds i8*, i8** %buffer_table, i64 12
%102 = load i8*, i8** %101, !invariant.load !20, !dereferenceable !0, !align !0
%arg3.4 = bitcast i8* %102 to [1 x [2 x float]]*
%103 = getelementptr inbounds i8*, i8** %buffer_table, i64 8
%104 = load i8*, i8** %103, !invariant.load !20, !dereferenceable !36, !align !0
%arg5.6 = bitcast i8* %104 to [1 x [2 x double]]*
%105 = getelementptr inbounds i8*, i8** %buffer_table, i64 1
%106 = load i8*, i8** %105, !invariant.load !20, !dereferenceable !24, !align !0
%fusion.5 = bitcast i8* %106 to [2 x [2 x double]]*
store i64 0, i64* %fusion.5.invar_address.dim.0
br label %fusion.5.loop_header.dim.0
fusion.5.loop_header.dim.0: ; preds = %fusion.5.loop_exit.dim.1, %dot.1.loop_exit.lhs.0
%fusion.5.indvar.dim.0 = load i64, i64* %fusion.5.invar_address.dim.0
%107 = icmp uge i64 %fusion.5.indvar.dim.0, 2
br i1 %107, label %fusion.5.loop_exit.dim.0, label %fusion.5.loop_body.dim.0
fusion.5.loop_body.dim.0: ; preds = %fusion.5.loop_header.dim.0
store i64 0, i64* %fusion.5.invar_address.dim.1
br label %fusion.5.loop_header.dim.1
fusion.5.loop_header.dim.1: ; preds = %fusion.5.loop_body.dim.1, %fusion.5.loop_body.dim.0
%fusion.5.indvar.dim.1 = load i64, i64* %fusion.5.invar_address.dim.1
%108 = icmp uge i64 %fusion.5.indvar.dim.1, 2
br i1 %108, label %fusion.5.loop_exit.dim.1, label %fusion.5.loop_body.dim.1
fusion.5.loop_body.dim.1: ; preds = %fusion.5.loop_header.dim.1
%109 = mul nuw nsw i64 %fusion.5.indvar.dim.1, 1
%110 = add nuw nsw i64 0, %109
%111 = udiv i64 %110, 2
%112 = mul nuw nsw i64 %fusion.5.indvar.dim.0, 1
%113 = add nuw nsw i64 0, %112
%114 = udiv i64 %113, 2
%115 = mul nuw nsw i64 %110, 1
%116 = add nuw nsw i64 0, %115
%117 = udiv i64 %116, 2
%118 = mul nuw nsw i64 %113, 1
%119 = add nuw nsw i64 0, %118
%120 = udiv i64 %119, 2
%121 = mul nuw nsw i64 %119, 1
%122 = add nuw nsw i64 0, %121
%123 = trunc i64 %122 to i32
%124 = mul nuw nsw i64 %116, 1
%125 = add nuw nsw i64 0, %124
%126 = trunc i64 %125 to i32
%127 = icmp eq i32 %123, %126
%128 = zext i1 %127 to i8
%129 = mul nuw nsw i64 %110, 1
%130 = add nuw nsw i64 0, %129
%131 = udiv i64 %130, 2
%132 = load float, float* bitcast ([4 x i8]* @9 to float*)
%133 = load float, float* bitcast ([4 x i8]* @10 to float*)
%134 = getelementptr inbounds [1 x [2 x float]], [1 x [2 x float]]* %arg3.4, i64 0, i64 0, i64 %130
%135 = load float, float* %134, !invariant.load !20, !noalias !37
%136 = fmul fast float %133, %135
%137 = fadd fast float %132, %136
%138 = fcmp fast une float %137, %137
%139 = zext i1 %138 to i8
%140 = load float, float* bitcast ([4 x i8]* @11 to float*)
%141 = fcmp fast one float %137, 0.000000e+00
%142 = uitofp i1 %141 to float
%143 = call fast float @llvm.copysign.f32(float %142, float %137)
%144 = fcmp fast uno float %137, %137
%145 = select fast i1 %144, float %137, float %143
%146 = trunc i8 %139 to i1
%147 = select fast i1 %146, float %140, float %145
%148 = fpext float %147 to double
%149 = load double, double* bitcast ([8 x i8]* @12 to double*)
%150 = load double, double* bitcast ([8 x i8]* @13 to double*)
%151 = getelementptr inbounds [1 x [2 x double]], [1 x [2 x double]]* %arg5.6, i64 0, i64 0, i64 %130
%152 = load double, double* %151, !invariant.load !20, !noalias !37
%153 = fmul fast double %150, %152
%154 = fadd fast double %149, %153
%155 = fmul fast double %148, %154
%156 = mul nuw nsw i64 %110, 1
%157 = add nuw nsw i64 0, %156
%158 = udiv i64 %157, 2
%159 = mul nuw nsw i64 %113, 1
%160 = add nuw nsw i64 0, %159
%161 = udiv i64 %160, 2
%162 = load i64, i64* bitcast ([8 x i8]* @14 to i64*)
%163 = mul nuw nsw i64 %157, 1
%164 = add nuw nsw i64 0, %163
%165 = mul nuw nsw i64 %160, 1
%166 = add nuw nsw i64 0, %165
%167 = sub i64 %164, %166
%168 = icmp sle i64 %162, %167
%169 = zext i1 %168 to i8
%170 = load i64, i64* bitcast ([8 x i8]* @15 to i64*)
%171 = icmp sle i64 %167, %170
%172 = zext i1 %171 to i8
%173 = and i8 %169, %172
%174 = getelementptr inbounds [1 x [2 x [2 x double]]], [1 x [2 x [2 x double]]]* %arg1.2, i64 0, i64 0, i64 %113, i64 %110
%175 = load double, double* %174, !invariant.load !20, !noalias !37
%176 = load double, double* bitcast ([8 x i8]* @16 to double*)
%177 = trunc i8 %173 to i1
%178 = select fast i1 %177, double %175, double %176
%179 = trunc i8 %128 to i1
%180 = select fast i1 %179, double %155, double %178
%181 = getelementptr inbounds [2 x [2 x double]], [2 x [2 x double]]* %fusion.5, i64 0, i64 %fusion.5.indvar.dim.0, i64 %fusion.5.indvar.dim.1
store double %180, double* %181, !alias.scope !37, !noalias !39
%invar.inc6 = add nuw nsw i64 %fusion.5.indvar.dim.1, 1
store i64 %invar.inc6, i64* %fusion.5.invar_address.dim.1
br label %fusion.5.loop_header.dim.1
fusion.5.loop_exit.dim.1: ; preds = %fusion.5.loop_header.dim.1
%invar.inc5 = add nuw nsw i64 %fusion.5.indvar.dim.0, 1
store i64 %invar.inc5, i64* %fusion.5.invar_address.dim.0
br label %fusion.5.loop_header.dim.0
fusion.5.loop_exit.dim.0: ; preds = %fusion.5.loop_header.dim.0
%182 = getelementptr inbounds i8*, i8** %buffer_table, i64 22
%183 = load i8*, i8** %182, !invariant.load !20, !dereferenceable !40, !align !36
%184 = getelementptr inbounds i8, i8* %183, i64 864
%dot = bitcast i8* %184 to [2 x [2 x double]]*
store i64 0, i64* %dot.invar_address.lhs.0
br label %dot.loop_header.lhs.0
dot.loop_header.lhs.0: ; preds = %dot.loop_exit.rhs.0, %fusion.5.loop_exit.dim.0
%dot.indvar.lhs.0 = load i64, i64* %dot.invar_address.lhs.0
%185 = icmp uge i64 %dot.indvar.lhs.0, 2
br i1 %185, label %dot.loop_exit.lhs.0, label %dot.loop_body.lhs.0
dot.loop_body.lhs.0: ; preds = %dot.loop_header.lhs.0
store i64 0, i64* %dot.invar_address.rhs.0
br label %dot.loop_header.rhs.0
dot.loop_header.rhs.0: ; preds = %dot.loop_exit.reduction, %dot.loop_body.lhs.0
%dot.indvar.rhs.0 = load i64, i64* %dot.invar_address.rhs.0
%186 = icmp uge i64 %dot.indvar.rhs.0, 2
br i1 %186, label %dot.loop_exit.rhs.0, label %dot.loop_body.rhs.0
dot.loop_body.rhs.0: ; preds = %dot.loop_header.rhs.0
store i64 0, i64* %dot.invar_address.reduction
store double 0.000000e+00, double* %accum_address10
br label %dot.loop_header.reduction
dot.loop_header.reduction: ; preds = %dot.loop_body.reduction, %dot.loop_body.rhs.0
%dot.indvar.reduction = load i64, i64* %dot.invar_address.reduction
%187 = icmp uge i64 %dot.indvar.reduction, 2
br i1 %187, label %dot.loop_exit.reduction, label %dot.loop_body.reduction
dot.loop_body.reduction: ; preds = %dot.loop_header.reduction
%188 = getelementptr inbounds [2 x [2 x double]], [2 x [2 x double]]* %fusion.5, i64 0, i64 %dot.indvar.lhs.0, i64 %dot.indvar.reduction
%189 = load double, double* %188, !alias.scope !37, !noalias !39
%190 = getelementptr inbounds [2 x [2 x double]], [2 x [2 x double]]* %fusion.5, i64 0, i64 %dot.indvar.rhs.0, i64 %dot.indvar.reduction
%191 = load double, double* %190, !alias.scope !37, !noalias !39
%192 = load double, double* %accum_address10
%193 = fmul fast double %189, %191
%194 = fadd fast double %192, %193
store double %194, double* %accum_address10
%invar.inc9 = add nuw nsw i64 %dot.indvar.reduction, 1
store i64 %invar.inc9, i64* %dot.invar_address.reduction
br label %dot.loop_header.reduction, !llvm.loop !41
dot.loop_exit.reduction: ; preds = %dot.loop_header.reduction
%195 = load double, double* %accum_address10
%196 = getelementptr inbounds [2 x [2 x double]], [2 x [2 x double]]* %dot, i64 0, i64 %dot.indvar.lhs.0, i64 %dot.indvar.rhs.0
store double %195, double* %196, !alias.scope !39, !noalias !42
%invar.inc8 = add nuw nsw i64 %dot.indvar.rhs.0, 1
store i64 %invar.inc8, i64* %dot.invar_address.rhs.0
br label %dot.loop_header.rhs.0
dot.loop_exit.rhs.0: ; preds = %dot.loop_header.rhs.0
%invar.inc7 = add nuw nsw i64 %dot.indvar.lhs.0, 1
store i64 %invar.inc7, i64* %dot.invar_address.lhs.0
br label %dot.loop_header.lhs.0
dot.loop_exit.lhs.0: ; preds = %dot.loop_header.lhs.0
%197 = getelementptr inbounds i8*, i8** %buffer_table, i64 10
%198 = load i8*, i8** %197, !invariant.load !20, !dereferenceable !22, !align !0
%arg2.3 = bitcast i8* %198 to [3 x [1 x float]]*
%199 = getelementptr inbounds i8*, i8** %buffer_table, i64 6
%200 = load i8*, i8** %199, !invariant.load !20, !dereferenceable !21, !align !0
%arg4.5 = bitcast i8* %200 to [3 x [1 x double]]*
%201 = getelementptr inbounds i8*, i8** %buffer_table, i64 22
%202 = load i8*, i8** %201, !invariant.load !20, !dereferenceable !40, !align !36
%fusion = bitcast i8* %202 to [3 x [6 x [6 x double]]]*
store i64 0, i64* %fusion.invar_address.dim.0
br label %fusion.loop_header.dim.0
fusion.loop_header.dim.0: ; preds = %fusion.loop_exit.dim.1, %dot.loop_exit.lhs.0
%fusion.indvar.dim.0 = load i64, i64* %fusion.invar_address.dim.0
%203 = icmp uge i64 %fusion.indvar.dim.0, 3
br i1 %203, label %fusion.loop_exit.dim.0, label %fusion.loop_body.dim.0
fusion.loop_body.dim.0: ; preds = %fusion.loop_header.dim.0
store i64 0, i64* %fusion.invar_address.dim.1
br label %fusion.loop_header.dim.1
fusion.loop_header.dim.1: ; preds = %fusion.loop_exit.dim.2, %fusion.loop_body.dim.0
%fusion.indvar.dim.1 = load i64, i64* %fusion.invar_address.dim.1
%204 = icmp uge i64 %fusion.indvar.dim.1, 6
br i1 %204, label %fusion.loop_exit.dim.1, label %fusion.loop_body.dim.1
fusion.loop_body.dim.1: ; preds = %fusion.loop_header.dim.1
store i64 0, i64* %fusion.invar_address.dim.2
br label %fusion.loop_header.dim.2
fusion.loop_header.dim.2: ; preds = %fusion.loop_body.dim.2, %fusion.loop_body.dim.1
%fusion.indvar.dim.2 = load i64, i64* %fusion.invar_address.dim.2
%205 = icmp uge i64 %fusion.indvar.dim.2, 6
br i1 %205, label %fusion.loop_exit.dim.2, label %fusion.loop_body.dim.2
fusion.loop_body.dim.2: ; preds = %fusion.loop_header.dim.2
%206 = mul nuw nsw i64 %fusion.indvar.dim.1, 1
%207 = add nuw nsw i64 0, %206
%208 = trunc i64 %207 to i32
%209 = mul nuw nsw i64 %fusion.indvar.dim.2, 1
%210 = add nuw nsw i64 0, %209
%211 = trunc i64 %210 to i32
%212 = icmp eq i32 %208, %211
%213 = zext i1 %212 to i8
%214 = mul nuw nsw i64 %fusion.indvar.dim.2, 1
%215 = add nuw nsw i64 0, %214
%216 = urem i64 %215, 3
%217 = udiv i64 %215, 3
%218 = udiv i64 %217, 2
%219 = mul nuw nsw i64 %fusion.indvar.dim.1, 1
%220 = add nuw nsw i64 0, %219
%221 = urem i64 %220, 3
%222 = udiv i64 %220, 3
%223 = udiv i64 %222, 2
%224 = mul nuw nsw i64 %fusion.indvar.dim.0, 1
%225 = add nuw nsw i64 0, %224
%226 = udiv i64 %225, 3
%227 = mul nuw nsw i64 %217, 1
%228 = add nuw nsw i64 0, %227
%229 = udiv i64 %228, 2
%230 = mul nuw nsw i64 %222, 1
%231 = add nuw nsw i64 0, %230
%232 = udiv i64 %231, 2
%233 = mul nuw nsw i64 %225, 1
%234 = add nuw nsw i64 0, %233
%235 = udiv i64 %234, 3
%236 = mul nuw nsw i64 %234, 1
%237 = add nuw nsw i64 0, %236
%238 = udiv i64 %237, 3
%239 = load float, float* bitcast ([4 x i8]* @17 to float*)
%240 = getelementptr inbounds [3 x [1 x float]], [3 x [1 x float]]* %arg2.3, i64 0, i64 %237, i64 0
%241 = load float, float* %240, !invariant.load !20, !noalias !44
%242 = load float, float* bitcast ([4 x i8]* @18 to float*)
%243 = fmul fast float %241, %242
%244 = fadd fast float %239, %243
%245 = fcmp fast une float %244, %244
%246 = zext i1 %245 to i8
%247 = load float, float* bitcast ([4 x i8]* @19 to float*)
%248 = fcmp fast one float %244, 0.000000e+00
%249 = uitofp i1 %248 to float
%250 = call fast float @llvm.copysign.f32(float %249, float %244)
%251 = fcmp fast uno float %244, %244
%252 = select fast i1 %251, float %244, float %250
%253 = trunc i8 %246 to i1
%254 = select fast i1 %253, float %247, float %252
%255 = fpext float %254 to double
%256 = load double, double* bitcast ([8 x i8]* @20 to double*)
%257 = getelementptr inbounds [3 x [1 x double]], [3 x [1 x double]]* %arg4.5, i64 0, i64 %237, i64 0
%258 = load double, double* %257, !invariant.load !20, !noalias !44
%259 = load double, double* bitcast ([8 x i8]* @21 to double*)
%260 = fmul fast double %258, %259
%261 = fadd fast double %256, %260
%262 = fmul fast double %255, %261
%263 = fmul fast double %262, %262
%264 = getelementptr inbounds [2 x [2 x double]], [2 x [2 x double]]* %dot, i64 0, i64 %231, i64 %228
%265 = load double, double* %264, !alias.scope !39, !noalias !42
%266 = fmul fast double %263, %265
%267 = getelementptr inbounds [3 x [3 x double]], [3 x [3 x double]]* %dot.1, i64 0, i64 %221, i64 %216
%268 = load double, double* %267, !alias.scope !27, !noalias !31
%269 = fmul fast double %266, %268
%270 = load double, double* bitcast ([8 x i8]* @22 to double*)
%271 = trunc i8 %213 to i1
%272 = select fast i1 %271, double %269, double %270
%273 = getelementptr inbounds [3 x [6 x [6 x double]]], [3 x [6 x [6 x double]]]* %fusion, i64 0, i64 %fusion.indvar.dim.0, i64 %fusion.indvar.dim.1, i64 %fusion.indvar.dim.2
store double %272, double* %273, !alias.scope !48, !noalias !49
%invar.inc13 = add nuw nsw i64 %fusion.indvar.dim.2, 1
store i64 %invar.inc13, i64* %fusion.invar_address.dim.2
br label %fusion.loop_header.dim.2
fusion.loop_exit.dim.2: ; preds = %fusion.loop_header.dim.2
%invar.inc12 = add nuw nsw i64 %fusion.indvar.dim.1, 1
store i64 %invar.inc12, i64* %fusion.invar_address.dim.1
br label %fusion.loop_header.dim.1
fusion.loop_exit.dim.1: ; preds = %fusion.loop_header.dim.1
%invar.inc11 = add nuw nsw i64 %fusion.indvar.dim.0, 1
store i64 %invar.inc11, i64* %fusion.invar_address.dim.0
br label %fusion.loop_header.dim.0
fusion.loop_exit.dim.0: ; preds = %fusion.loop_header.dim.0
%274 = getelementptr inbounds i8*, i8** %buffer_table, i64 1
%275 = load i8*, i8** %274, !invariant.load !20, !dereferenceable !24, !align !0
%reduce.322 = bitcast i8* %275 to [3 x [6 x double]]*
store i64 0, i64* %reduce.322.invar_address.dim.0
br label %reduce.322.loop_header.dim.0
reduce.322.loop_header.dim.0: ; preds = %fusion.vectorized_inner.loop_exit.reduction_dim.1, %fusion.loop_exit.dim.0
%reduce.322.indvar.dim.0 = load i64, i64* %reduce.322.invar_address.dim.0
%276 = icmp uge i64 %reduce.322.indvar.dim.0, 3
br i1 %276, label %reduce.322.loop_exit.dim.0, label %reduce.322.loop_body.dim.0
reduce.322.loop_body.dim.0: ; preds = %reduce.322.loop_header.dim.0
%277 = load double, double* bitcast ([8 x i8]* @0 to double*)
%.splatinsert = insertelement <2 x double> undef, double %277, i32 0
%.splat = shufflevector <2 x double> %.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer
store <2 x double> %.splat, <2 x double>* %accumulator, align 8
%.splatinsert16 = insertelement <4 x double> undef, double %277, i32 0
%.splat17 = shufflevector <4 x double> %.splatinsert16, <4 x double> undef, <4 x i32> zeroinitializer
store <4 x double> %.splat17, <4 x double>* %accumulator15, align 8
store i64 0, i64* %fusion.vectorized_inner.invar_address.reduction_dim.1
br label %fusion.vectorized_inner.loop_header.reduction_dim.1
fusion.vectorized_inner.loop_header.reduction_dim.1: ; preds = %fusion.vectorized_inner.loop_body.reduction_dim.1, %reduce.322.loop_body.dim.0
%fusion.vectorized_inner.indvar.reduction_dim.1 = load i64, i64* %fusion.vectorized_inner.invar_address.reduction_dim.1
%278 = icmp uge i64 %fusion.vectorized_inner.indvar.reduction_dim.1, 6
br i1 %278, label %fusion.vectorized_inner.loop_exit.reduction_dim.1, label %fusion.vectorized_inner.loop_body.reduction_dim.1
fusion.vectorized_inner.loop_body.reduction_dim.1: ; preds = %fusion.vectorized_inner.loop_header.reduction_dim.1
%279 = getelementptr inbounds [3 x [6 x [6 x double]]], [3 x [6 x [6 x double]]]* %fusion, i64 0, i64 %reduce.322.indvar.dim.0, i64 %fusion.vectorized_inner.indvar.reduction_dim.1, i64 0
%280 = bitcast double* %279 to i8*
%281 = bitcast i8* %280 to <2 x double>*
%282 = load <2 x double>, <2 x double>* %accumulator, align 8
%283 = load <2 x double>, <2 x double>* %281, align 8, !alias.scope !48, !noalias !49
%284 = fadd fast <2 x double> %282, %283
store <2 x double> %284, <2 x double>* %accumulator, align 8
%285 = getelementptr inbounds <2 x double>, <2 x double>* %281, i32 1
%286 = bitcast <2 x double>* %285 to <4 x double>*
%287 = load <4 x double>, <4 x double>* %accumulator15, align 8
%288 = load <4 x double>, <4 x double>* %286, align 8, !alias.scope !48, !noalias !49
%289 = fadd fast <4 x double> %287, %288
store <4 x double> %289, <4 x double>* %accumulator15, align 8
%invar.inc18 = add nuw nsw i64 %fusion.vectorized_inner.indvar.reduction_dim.1, 1
store i64 %invar.inc18, i64* %fusion.vectorized_inner.invar_address.reduction_dim.1
br label %fusion.vectorized_inner.loop_header.reduction_dim.1
fusion.vectorized_inner.loop_exit.reduction_dim.1: ; preds = %fusion.vectorized_inner.loop_header.reduction_dim.1
%290 = load <2 x double>, <2 x double>* %accumulator, align 8
%291 = load <4 x double>, <4 x double>* %accumulator15, align 8
%292 = getelementptr inbounds [3 x [6 x double]], [3 x [6 x double]]* %reduce.322, i64 0, i64 %reduce.322.indvar.dim.0, i64 0
%293 = bitcast double* %292 to <2 x double>*
store <2 x double> %290, <2 x double>* %293, align 8, !alias.scope !52, !noalias !53
%294 = getelementptr inbounds <2 x double>, <2 x double>* %293, i32 1
%295 = bitcast <2 x double>* %294 to <4 x double>*
store <4 x double> %291, <4 x double>* %295, align 8, !alias.scope !52, !noalias !53
%invar.inc14 = add nuw nsw i64 %reduce.322.indvar.dim.0, 1
store i64 %invar.inc14, i64* %reduce.322.invar_address.dim.0
br label %reduce.322.loop_header.dim.0
reduce.322.loop_exit.dim.0: ; preds = %reduce.322.loop_header.dim.0
%296 = getelementptr inbounds i8*, i8** %buffer_table, i64 22
%297 = load i8*, i8** %296, !invariant.load !20, !dereferenceable !40, !align !36
%fusion.2 = bitcast i8* %297 to [1 x [3 x [3 x double]]]*
store i64 0, i64* %fusion.2.invar_address.dim.0
br label %fusion.2.loop_header.dim.0
fusion.2.loop_header.dim.0: ; preds = %fusion.2.loop_exit.dim.1, %reduce.322.loop_exit.dim.0
%fusion.2.indvar.dim.0 = load i64, i64* %fusion.2.invar_address.dim.0
%298 = icmp uge i64 %fusion.2.indvar.dim.0, 1
br i1 %298, label %fusion.2.loop_exit.dim.0, label %fusion.2.loop_body.dim.0
fusion.2.loop_body.dim.0: ; preds = %fusion.2.loop_header.dim.0
store i64 0, i64* %fusion.2.invar_address.dim.1
br label %fusion.2.loop_header.dim.1
fusion.2.loop_header.dim.1: ; preds = %fusion.2.loop_exit.dim.2, %fusion.2.loop_body.dim.0
%fusion.2.indvar.dim.1 = load i64, i64* %fusion.2.invar_address.dim.1
%299 = icmp uge i64 %fusion.2.indvar.dim.1, 3
br i1 %299, label %fusion.2.loop_exit.dim.1, label %fusion.2.loop_body.dim.1
fusion.2.loop_body.dim.1: ; preds = %fusion.2.loop_header.dim.1
store i64 0, i64* %fusion.2.invar_address.dim.2
br label %fusion.2.loop_header.dim.2
fusion.2.loop_header.dim.2: ; preds = %fusion.2.loop_body.dim.2, %fusion.2.loop_body.dim.1
%fusion.2.indvar.dim.2 = load i64, i64* %fusion.2.invar_address.dim.2
%300 = icmp uge i64 %fusion.2.indvar.dim.2, 3
br i1 %300, label %fusion.2.loop_exit.dim.2, label %fusion.2.loop_body.dim.2
fusion.2.loop_body.dim.2: ; preds = %fusion.2.loop_header.dim.2
%301 = mul nuw nsw i64 %fusion.2.indvar.dim.2, 1
%302 = add nuw nsw i64 0, %301
%303 = udiv i64 %302, 3
%304 = mul nuw nsw i64 %fusion.2.indvar.dim.1, 1
%305 = add nuw nsw i64 0, %304
%306 = udiv i64 %305, 3
%307 = mul nuw nsw i64 %fusion.2.indvar.dim.0, 1
%308 = add nuw nsw i64 0, %307
%309 = mul nuw nsw i64 %305, 1
%310 = add nuw nsw i64 0, %309
%311 = trunc i64 %310 to i32
%312 = mul nuw nsw i64 %302, 1
%313 = add nuw nsw i64 0, %312
%314 = trunc i64 %313 to i32
%315 = icmp eq i32 %311, %314
%316 = zext i1 %315 to i8
%317 = getelementptr inbounds [3 x [3 x double]], [3 x [3 x double]]* %dot.1, i64 0, i64 %305, i64 %302
%318 = load double, double* %317, !alias.scope !27, !noalias !31
%319 = load double, double* bitcast ([8 x i8]* @23 to double*)
%320 = trunc i8 %316 to i1
%321 = select fast i1 %320, double %318, double %319
%322 = getelementptr inbounds [1 x [3 x [3 x double]]], [1 x [3 x [3 x double]]]* %fusion.2, i64 0, i64 0, i64 %fusion.2.indvar.dim.1, i64 %fusion.2.indvar.dim.2
store double %321, double* %322, !alias.scope !55, !noalias !56
%invar.inc21 = add nuw nsw i64 %fusion.2.indvar.dim.2, 1
store i64 %invar.inc21, i64* %fusion.2.invar_address.dim.2
br label %fusion.2.loop_header.dim.2
fusion.2.loop_exit.dim.2: ; preds = %fusion.2.loop_header.dim.2
%invar.inc20 = add nuw nsw i64 %fusion.2.indvar.dim.1, 1
store i64 %invar.inc20, i64* %fusion.2.invar_address.dim.1
br label %fusion.2.loop_header.dim.1
fusion.2.loop_exit.dim.1: ; preds = %fusion.2.loop_header.dim.1
%invar.inc19 = add nuw nsw i64 %fusion.2.indvar.dim.0, 1
store i64 %invar.inc19, i64* %fusion.2.invar_address.dim.0
br label %fusion.2.loop_header.dim.0
fusion.2.loop_exit.dim.0: ; preds = %fusion.2.loop_header.dim.0
%323 = getelementptr inbounds i8*, i8** %buffer_table, i64 22
%324 = load i8*, i8** %323, !invariant.load !20, !dereferenceable !40, !align !36
%325 = getelementptr inbounds i8, i8* %324, i64 80
%reduce.185 = bitcast i8* %325 to [1 x [3 x double]]*
store i64 0, i64* %reduce.185.invar_address.dim.0
br label %reduce.185.loop_header.dim.0
reduce.185.loop_header.dim.0: ; preds = %fusion.2.vectorized_inner.loop_exit.reduction_dim.1, %fusion.2.loop_exit.dim.0
%reduce.185.indvar.dim.0 = load i64, i64* %reduce.185.invar_address.dim.0
%326 = icmp uge i64 %reduce.185.indvar.dim.0, 1
br i1 %326, label %reduce.185.loop_exit.dim.0, label %reduce.185.loop_body.dim.0
reduce.185.loop_body.dim.0: ; preds = %reduce.185.loop_header.dim.0
%327 = load double, double* bitcast ([8 x i8]* @0 to double*)
store double %327, double* %accumulator23, align 8
%.splatinsert25 = insertelement <2 x double> undef, double %327, i32 0
%.splat26 = shufflevector <2 x double> %.splatinsert25, <2 x double> undef, <2 x i32> zeroinitializer
store <2 x double> %.splat26, <2 x double>* %accumulator24, align 8
store i64 0, i64* %fusion.2.vectorized_inner.invar_address.reduction_dim.1
br label %fusion.2.vectorized_inner.loop_header.reduction_dim.1
fusion.2.vectorized_inner.loop_header.reduction_dim.1: ; preds = %fusion.2.vectorized_inner.loop_body.reduction_dim.1, %reduce.185.loop_body.dim.0
%fusion.2.vectorized_inner.indvar.reduction_dim.1 = load i64, i64* %fusion.2.vectorized_inner.invar_address.reduction_dim.1
%328 = icmp uge i64 %fusion.2.vectorized_inner.indvar.reduction_dim.1, 3
br i1 %328, label %fusion.2.vectorized_inner.loop_exit.reduction_dim.1, label %fusion.2.vectorized_inner.loop_body.reduction_dim.1
fusion.2.vectorized_inner.loop_body.reduction_dim.1: ; preds = %fusion.2.vectorized_inner.loop_header.reduction_dim.1
%329 = getelementptr inbounds [1 x [3 x [3 x double]]], [1 x [3 x [3 x double]]]* %fusion.2, i64 0, i64 0, i64 %fusion.2.vectorized_inner.indvar.reduction_dim.1, i64 0
%330 = bitcast double* %329 to i8*
%331 = bitcast i8* %330 to double*
%332 = load double, double* %accumulator23, align 8
%333 = load double, double* %331, align 8, !alias.scope !55, !noalias !56
%334 = fadd fast double %332, %333
store double %334, double* %accumulator23, align 8
%335 = getelementptr inbounds double, double* %331, i32 1
%336 = bitcast double* %335 to <2 x double>*
%337 = load <2 x double>, <2 x double>* %accumulator24, align 8
%338 = load <2 x double>, <2 x double>* %336, align 8, !alias.scope !55, !noalias !56
%339 = fadd fast <2 x double> %337, %338
store <2 x double> %339, <2 x double>* %accumulator24, align 8
%invar.inc27 = add nuw nsw i64 %fusion.2.vectorized_inner.indvar.reduction_dim.1, 1
store i64 %invar.inc27, i64* %fusion.2.vectorized_inner.invar_address.reduction_dim.1
br label %fusion.2.vectorized_inner.loop_header.reduction_dim.1
fusion.2.vectorized_inner.loop_exit.reduction_dim.1: ; preds = %fusion.2.vectorized_inner.loop_header.reduction_dim.1
%340 = load double, double* %accumulator23, align 8
%341 = load <2 x double>, <2 x double>* %accumulator24, align 8
%342 = getelementptr inbounds [1 x [3 x double]], [1 x [3 x double]]* %reduce.185, i64 0, i64 0, i64 0
store double %340, double* %342, align 8, !alias.scope !57, !noalias !58
%343 = getelementptr inbounds double, double* %342, i32 1
%344 = bitcast double* %343 to <2 x double>*
store <2 x double> %341, <2 x double>* %344, align 8, !alias.scope !57, !noalias !58
%invar.inc22 = add nuw nsw i64 %reduce.185.indvar.dim.0, 1
store i64 %invar.inc22, i64* %reduce.185.invar_address.dim.0
br label %reduce.185.loop_header.dim.0
reduce.185.loop_exit.dim.0: ; preds = %reduce.185.loop_header.dim.0
%345 = getelementptr inbounds i8*, i8** %buffer_table, i64 0
%346 = load i8*, i8** %345, !invariant.load !20, !dereferenceable !24, !align !0
%fusion.4 = bitcast i8* %346 to [1 x [2 x [2 x double]]]*
store i64 0, i64* %fusion.4.invar_address.dim.0
br label %fusion.4.loop_header.dim.0
fusion.4.loop_header.dim.0: ; preds = %fusion.4.loop_exit.dim.1, %reduce.185.loop_exit.dim.0
%fusion.4.indvar.dim.0 = load i64, i64* %fusion.4.invar_address.dim.0
%347 = icmp uge i64 %fusion.4.indvar.dim.0, 1
br i1 %347, label %fusion.4.loop_exit.dim.0, label %fusion.4.loop_body.dim.0
fusion.4.loop_body.dim.0: ; preds = %fusion.4.loop_header.dim.0
store i64 0, i64* %fusion.4.invar_address.dim.1
br label %fusion.4.loop_header.dim.1
fusion.4.loop_header.dim.1: ; preds = %fusion.4.loop_exit.dim.2, %fusion.4.loop_body.dim.0
%fusion.4.indvar.dim.1 = load i64, i64* %fusion.4.invar_address.dim.1
%348 = icmp uge i64 %fusion.4.indvar.dim.1, 2
br i1 %348, label %fusion.4.loop_exit.dim.1, label %fusion.4.loop_body.dim.1
fusion.4.loop_body.dim.1: ; preds = %fusion.4.loop_header.dim.1
store i64 0, i64* %fusion.4.invar_address.dim.2
br label %fusion.4.loop_header.dim.2
fusion.4.loop_header.dim.2: ; preds = %fusion.4.loop_body.dim.2, %fusion.4.loop_body.dim.1
%fusion.4.indvar.dim.2 = load i64, i64* %fusion.4.invar_address.dim.2
%349 = icmp uge i64 %fusion.4.indvar.dim.2, 2
br i1 %349, label %fusion.4.loop_exit.dim.2, label %fusion.4.loop_body.dim.2
fusion.4.loop_body.dim.2: ; preds = %fusion.4.loop_header.dim.2
%350 = mul nuw nsw i64 %fusion.4.indvar.dim.2, 1
%351 = add nuw nsw i64 0, %350
%352 = udiv i64 %351, 2
%353 = mul nuw nsw i64 %fusion.4.indvar.dim.1, 1
%354 = add nuw nsw i64 0, %353
%355 = udiv i64 %354, 2
%356 = mul nuw nsw i64 %fusion.4.indvar.dim.0, 1
%357 = add nuw nsw i64 0, %356
%358 = mul nuw nsw i64 %354, 1
%359 = add nuw nsw i64 0, %358
%360 = trunc i64 %359 to i32
%361 = mul nuw nsw i64 %351, 1
%362 = add nuw nsw i64 0, %361
%363 = trunc i64 %362 to i32
%364 = icmp eq i32 %360, %363
%365 = zext i1 %364 to i8
%366 = getelementptr inbounds [2 x [2 x double]], [2 x [2 x double]]* %dot, i64 0, i64 %354, i64 %351
%367 = load double, double* %366, !alias.scope !39, !noalias !42
%368 = load double, double* bitcast ([8 x i8]* @24 to double*)
%369 = trunc i8 %365 to i1
%370 = select fast i1 %369, double %367, double %368
%371 = getelementptr inbounds [1 x [2 x [2 x double]]], [1 x [2 x [2 x double]]]* %fusion.4, i64 0, i64 0, i64 %fusion.4.indvar.dim.1, i64 %fusion.4.indvar.dim.2
store double %370, double* %371, !alias.scope !59, !noalias !60
%invar.inc30 = add nuw nsw i64 %fusion.4.indvar.dim.2, 1
store i64 %invar.inc30, i64* %fusion.4.invar_address.dim.2
br label %fusion.4.loop_header.dim.2
fusion.4.loop_exit.dim.2: ; preds = %fusion.4.loop_header.dim.2
%invar.inc29 = add nuw nsw i64 %fusion.4.indvar.dim.1, 1
store i64 %invar.inc29, i64* %fusion.4.invar_address.dim.1
br label %fusion.4.loop_header.dim.1
fusion.4.loop_exit.dim.1: ; preds = %fusion.4.loop_header.dim.1
%invar.inc28 = add nuw nsw i64 %fusion.4.indvar.dim.0, 1
store i64 %invar.inc28, i64* %fusion.4.invar_address.dim.0
br label %fusion.4.loop_header.dim.0
fusion.4.loop_exit.dim.0: ; preds = %fusion.4.loop_header.dim.0
%372 = getelementptr inbounds i8*, i8** %buffer_table, i64 22
%373 = load i8*, i8** %372, !invariant.load !20, !dereferenceable !40, !align !36
%reduce.102 = bitcast i8* %373 to [1 x [2 x double]]*
store i64 0, i64* %reduce.102.invar_address.dim.0
br label %reduce.102.loop_header.dim.0
reduce.102.loop_header.dim.0: ; preds = %fusion.4.vectorized_inner.loop_exit.reduction_dim.1, %fusion.4.loop_exit.dim.0
%reduce.102.indvar.dim.0 = load i64, i64* %reduce.102.invar_address.dim.0
%374 = icmp uge i64 %reduce.102.indvar.dim.0, 1
br i1 %374, label %reduce.102.loop_exit.dim.0, label %reduce.102.loop_body.dim.0
reduce.102.loop_body.dim.0: ; preds = %reduce.102.loop_header.dim.0
%375 = load double, double* bitcast ([8 x i8]* @0 to double*)
%.splatinsert33 = insertelement <2 x double> undef, double %375, i32 0
%.splat34 = shufflevector <2 x double> %.splatinsert33, <2 x double> undef, <2 x i32> zeroinitializer
store <2 x double> %.splat34, <2 x double>* %accumulator32, align 8
store i64 0, i64* %fusion.4.vectorized_inner.invar_address.reduction_dim.1
br label %fusion.4.vectorized_inner.loop_header.reduction_dim.1
fusion.4.vectorized_inner.loop_header.reduction_dim.1: ; preds = %fusion.4.vectorized_inner.loop_body.reduction_dim.1, %reduce.102.loop_body.dim.0
%fusion.4.vectorized_inner.indvar.reduction_dim.1 = load i64, i64* %fusion.4.vectorized_inner.invar_address.reduction_dim.1
%376 = icmp uge i64 %fusion.4.vectorized_inner.indvar.reduction_dim.1, 2
br i1 %376, label %fusion.4.vectorized_inner.loop_exit.reduction_dim.1, label %fusion.4.vectorized_inner.loop_body.reduction_dim.1
fusion.4.vectorized_inner.loop_body.reduction_dim.1: ; preds = %fusion.4.vectorized_inner.loop_header.reduction_dim.1
%377 = getelementptr inbounds [1 x [2 x [2 x double]]], [1 x [2 x [2 x double]]]* %fusion.4, i64 0, i64 0, i64 %fusion.4.vectorized_inner.indvar.reduction_dim.1, i64 0
%378 = bitcast double* %377 to i8*
%379 = bitcast i8* %378 to <2 x double>*
%380 = load <2 x double>, <2 x double>* %accumulator32, align 8
%381 = load <2 x double>, <2 x double>* %379, align 8, !alias.scope !59, !noalias !60
%382 = fadd fast <2 x double> %380, %381
store <2 x double> %382, <2 x double>* %accumulator32, align 8
%invar.inc35 = add nuw nsw i64 %fusion.4.vectorized_inner.indvar.reduction_dim.1, 1
store i64 %invar.inc35, i64* %fusion.4.vectorized_inner.invar_address.reduction_dim.1
br label %fusion.4.vectorized_inner.loop_header.reduction_dim.1
fusion.4.vectorized_inner.loop_exit.reduction_dim.1: ; preds = %fusion.4.vectorized_inner.loop_header.reduction_dim.1
%383 = load <2 x double>, <2 x double>* %accumulator32, align 8
%384 = getelementptr inbounds [1 x [2 x double]], [1 x [2 x double]]* %reduce.102, i64 0, i64 0, i64 0
%385 = bitcast double* %384 to <2 x double>*
store <2 x double> %383, <2 x double>* %385, align 8, !alias.scope !61, !noalias !62
%invar.inc31 = add nuw nsw i64 %reduce.102.indvar.dim.0, 1
store i64 %invar.inc31, i64* %reduce.102.invar_address.dim.0
br label %reduce.102.loop_header.dim.0
reduce.102.loop_exit.dim.0: ; preds = %reduce.102.loop_header.dim.0
%386 = getelementptr inbounds i8*, i8** %buffer_table, i64 0
%387 = load i8*, i8** %386, !invariant.load !20, !dereferenceable !24, !align !0
%fusion.1 = bitcast i8* %387 to [3 x [6 x double]]*
store i64 0, i64* %fusion.1.invar_address.dim.0
br label %fusion.1.loop_header.dim.0
fusion.1.loop_header.dim.0: ; preds = %fusion.1.loop_exit.dim.1, %reduce.102.loop_exit.dim.0
%fusion.1.indvar.dim.0 = load i64, i64* %fusion.1.invar_address.dim.0
%388 = icmp uge i64 %fusion.1.indvar.dim.0, 3
br i1 %388, label %fusion.1.loop_exit.dim.0, label %fusion.1.loop_body.dim.0
fusion.1.loop_body.dim.0: ; preds = %fusion.1.loop_header.dim.0
store i64 0, i64* %fusion.1.invar_address.dim.1
br label %fusion.1.loop_header.dim.1
fusion.1.loop_header.dim.1: ; preds = %fusion.1.loop_body.dim.1, %fusion.1.loop_body.dim.0
%fusion.1.indvar.dim.1 = load i64, i64* %fusion.1.invar_address.dim.1
%389 = icmp uge i64 %fusion.1.indvar.dim.1, 6
br i1 %389, label %fusion.1.loop_exit.dim.1, label %fusion.1.loop_body.dim.1
fusion.1.loop_body.dim.1: ; preds = %fusion.1.loop_header.dim.1
%390 = mul nuw nsw i64 %fusion.1.indvar.dim.1, 1
%391 = add nuw nsw i64 0, %390
%392 = urem i64 %391, 3
%393 = udiv i64 %391, 3
%394 = udiv i64 %393, 2
%395 = mul nuw nsw i64 %fusion.1.indvar.dim.0, 1
%396 = add nuw nsw i64 0, %395
%397 = udiv i64 %396, 3
%398 = mul nuw nsw i64 %393, 1
%399 = add nuw nsw i64 0, %398
%400 = udiv i64 %399, 2
%401 = mul nuw nsw i64 %396, 1
%402 = add nuw nsw i64 0, %401
%403 = udiv i64 %402, 3
%404 = mul nuw nsw i64 %402, 1
%405 = add nuw nsw i64 0, %404
%406 = udiv i64 %405, 3
%407 = load float, float* bitcast ([4 x i8]* @25 to float*)
%408 = getelementptr inbounds [3 x [1 x float]], [3 x [1 x float]]* %arg2.3, i64 0, i64 %405, i64 0
%409 = load float, float* %408, !invariant.load !20, !noalias !44
%410 = load float, float* bitcast ([4 x i8]* @26 to float*)
%411 = fmul fast float %409, %410
%412 = fadd fast float %407, %411
%413 = fcmp fast une float %412, %412
%414 = zext i1 %413 to i8
%415 = load float, float* bitcast ([4 x i8]* @27 to float*)
%416 = fcmp fast one float %412, 0.000000e+00
%417 = uitofp i1 %416 to float
%418 = call fast float @llvm.copysign.f32(float %417, float %412)
%419 = fcmp fast uno float %412, %412
%420 = select fast i1 %419, float %412, float %418
%421 = trunc i8 %414 to i1
%422 = select fast i1 %421, float %415, float %420
%423 = fpext float %422 to double
%424 = load double, double* bitcast ([8 x i8]* @28 to double*)
%425 = getelementptr inbounds [3 x [1 x double]], [3 x [1 x double]]* %arg4.5, i64 0, i64 %405, i64 0
%426 = load double, double* %425, !invariant.load !20, !noalias !44
%427 = load double, double* bitcast ([8 x i8]* @29 to double*)
%428 = fmul fast double %426, %427
%429 = fadd fast double %424, %428
%430 = fmul fast double %423, %429
%431 = fmul fast double %430, %430
%432 = mul nuw nsw i64 %399, 1
%433 = add nuw nsw i64 0, %432
%434 = udiv i64 %433, 2
%435 = getelementptr inbounds [1 x [2 x double]], [1 x [2 x double]]* %reduce.102, i64 0, i64 0, i64 %433
%436 = load double, double* %435, !alias.scope !61, !noalias !62
%437 = fmul fast double %431, %436
%438 = mul nuw nsw i64 %392, 1
%439 = add nuw nsw i64 0, %438
%440 = udiv i64 %439, 3
%441 = getelementptr inbounds [1 x [3 x double]], [1 x [3 x double]]* %reduce.185, i64 0, i64 0, i64 %439
%442 = load double, double* %441, !alias.scope !57, !noalias !58
%443 = fmul fast double %437, %442
%444 = getelementptr inbounds [3 x [6 x double]], [3 x [6 x double]]* %fusion.1, i64 0, i64 %fusion.1.indvar.dim.0, i64 %fusion.1.indvar.dim.1
store double %443, double* %444, !alias.scope !63, !noalias !64
%invar.inc37 = add nuw nsw i64 %fusion.1.indvar.dim.1, 1
store i64 %invar.inc37, i64* %fusion.1.invar_address.dim.1
br label %fusion.1.loop_header.dim.1
fusion.1.loop_exit.dim.1: ; preds = %fusion.1.loop_header.dim.1
%invar.inc36 = add nuw nsw i64 %fusion.1.indvar.dim.0, 1
store i64 %invar.inc36, i64* %fusion.1.invar_address.dim.0
br label %fusion.1.loop_header.dim.0
fusion.1.loop_exit.dim.0: ; preds = %fusion.1.loop_header.dim.0
%445 = getelementptr inbounds i8*, i8** %buffer_table, i64 7
%446 = load i8*, i8** %445, !invariant.load !20, !dereferenceable !36, !align !0
%tuple.325 = bitcast i8* %446 to [2 x i8*]*
%447 = bitcast [3 x [6 x double]]* %fusion.1 to i8*
%448 = getelementptr inbounds [2 x i8*], [2 x i8*]* %tuple.325, i64 0, i64 0
store i8* %447, i8** %448, !alias.scope !65, !noalias !66
%449 = bitcast [3 x [6 x double]]* %reduce.322 to i8*
%450 = getelementptr inbounds [2 x i8*], [2 x i8*]* %tuple.325, i64 0, i64 1
store i8* %449, i8** %450, !alias.scope !65, !noalias !66
ret void
}
; Function Attrs: nounwind readnone speculatable willreturn
declare float @llvm.copysign.f32(float, float) #1
attributes #0 = { uwtable "denormal-fp-math"="preserve-sign" "no-frame-pointer-elim"="false" }
attributes #1 = { nounwind readnone speculatable willreturn }
!0 = !{i64 8}
!1 = !{!2}
!2 = !{!"buffer: {index:20, offset:0, size:8}", !3}
!3 = !{!"XLA global AA domain"}
!4 = !{!5}
!5 = !{!"buffer: {index:13, offset:0, size:8}", !3}
!6 = !{!7}
!7 = !{!"buffer: {index:21, offset:0, size:8}", !3}
!8 = !{!9}
!9 = !{!"buffer: {index:17, offset:0, size:8}", !3}
!10 = !{!11}
!11 = !{!"buffer: {index:19, offset:0, size:8}", !3}
!12 = !{!13}
!13 = !{!"buffer: {index:18, offset:0, size:8}", !3}
!14 = !{!15}
!15 = !{!"buffer: {index:14, offset:0, size:8}", !3}
!16 = !{!17}
!17 = !{!"buffer: {index:16, offset:0, size:8}", !3}
!18 = !{!19}
!19 = !{!"buffer: {index:15, offset:0, size:8}", !3}
!20 = !{}
!21 = !{i64 24}
!22 = !{i64 12}
!23 = !{i64 72}
!24 = !{i64 144}
!25 = !{!26}
!26 = !{!"buffer: {index:1, offset:0, size:72}", !3}
!27 = !{!28}
!28 = !{!"buffer: {index:0, offset:0, size:72}", !3}
!29 = distinct !{!29, !30}
!30 = !{!"llvm.loop.unroll.disable"}
!31 = !{!26, !32, !33, !34}
!32 = !{!"buffer: {index:22, offset:0, size:72}", !3}
!33 = !{!"buffer: {index:22, offset:0, size:864}", !3}
!34 = !{!"buffer: {index:22, offset:864, size:32}", !3}
!35 = !{i64 32}
!36 = !{i64 16}
!37 = !{!38}
!38 = !{!"buffer: {index:1, offset:0, size:32}", !3}
!39 = !{!34}
!40 = !{i64 896}
!41 = distinct !{!41, !30}
!42 = !{!43, !28, !38, !33}
!43 = !{!"buffer: {index:0, offset:0, size:32}", !3}
!44 = !{!28, !45, !46, !33, !47, !34}
!45 = !{!"buffer: {index:0, offset:0, size:144}", !3}
!46 = !{!"buffer: {index:22, offset:0, size:16}", !3}
!47 = !{!"buffer: {index:22, offset:80, size:24}", !3}
!48 = !{!33}
!49 = !{!28, !50, !51, !34}
!50 = !{!"buffer: {index:1, offset:0, size:144}", !3}
!51 = !{!"buffer: {index:11, offset:0, size:8}", !3}
!52 = !{!50}
!53 = !{!45, !54, !51, !33}
!54 = !{!"buffer: {index:7, offset:0, size:16}", !3}
!55 = !{!32}
!56 = !{!28, !51, !47}
!57 = !{!47}
!58 = !{!45, !51, !46, !32}
!59 = !{!43}
!60 = !{!51, !46, !34}
!61 = !{!46}
!62 = !{!43, !45, !51, !47}
!63 = !{!45}
!64 = !{!50, !54, !46, !47}
!65 = !{!54}
!66 = !{!45, !50}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment