Skip to content

Instantly share code, notes, and snippets.

@ArchRobison
Created March 9, 2016 22:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ArchRobison/f913f6b6a9c5743ff17e to your computer and use it in GitHub Desktop.
Save ArchRobison/f913f6b6a9c5743ff17e to your computer and use it in GitHub Desktop.
Example Program
@noinline function matmul!{N}(C, A, B, ::Type{Val{N}})
@inbounds for i in 1:N, j in 1:N
@simd for k in 1:N
C[k, j] += A[k, i] * B[i, j]
end
end
end
n = 1
A = rand(Float64, n, n)
@code_llvm matmul!(A, A, A, Val{n})
$ julia matmul.jl
define %jl_value_t* @"julia_matmul!_23350.1"(%jl_value_t*, %jl_value_t*, %jl_value_t*, %jl_value_t*) #0 {
top:
%4 = load i64, i64* inttoptr (i64 140503009370240 to i64*), align 128
%5 = icmp slt i64 %4, 1
br i1 %5, label %L10, label %L.preheader
L.preheader: ; preds = %top
%6 = bitcast %jl_value_t* %0 to double**
%7 = load double*, double** %6, align 8
%8 = getelementptr inbounds %jl_value_t, %jl_value_t* %0, i64 3, i32 0
%9 = bitcast %jl_value_t** %8 to i64*
%10 = load i64, i64* %9, align 8
%11 = bitcast %jl_value_t* %1 to double**
%12 = load double*, double** %11, align 8
%13 = getelementptr inbounds %jl_value_t, %jl_value_t* %1, i64 3, i32 0
%14 = bitcast %jl_value_t** %13 to i64*
%15 = load i64, i64* %14, align 8
%16 = bitcast %jl_value_t* %2 to double**
%17 = load double*, double** %16, align 8
%18 = getelementptr inbounds %jl_value_t, %jl_value_t* %2, i64 3, i32 0
%19 = bitcast %jl_value_t** %18 to i64*
%20 = load i64, i64* %19, align 8
br label %L
L: ; preds = %L.preheader, %L9
%"#s3.0" = phi i64 [ %47, %L9 ], [ 1, %L.preheader ]
%21 = load i64, i64* inttoptr (i64 140503009370240 to i64*), align 128
%22 = icmp slt i64 %21, 1
br i1 %22, label %L9, label %L1.preheader
L1.preheader: ; preds = %L
%23 = add i64 %"#s3.0", -1
%24 = mul i64 %23, %15
br label %L1
L1: ; preds = %L1.preheader, %L8
%"#s1.0" = phi i64 [ %45, %L8 ], [ 1, %L1.preheader ]
%25 = load i64, i64* inttoptr (i64 140503009370240 to i64*), align 128
%26 = icmp sgt i64 %25, 0
%27 = select i1 %26, i64 %25, i64 0
%28 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %27, i64 1)
%29 = extractvalue { i64, i1 } %28, 1
br i1 %29, label %fail.split, label %L1.L1.split_crit_edge
L1.L1.split_crit_edge: ; preds = %L1
%30 = extractvalue { i64, i1 } %28, 0
%31 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %30, i64 1)
%32 = extractvalue { i64, i1 } %31, 1
br i1 %32, label %L1.split.split.us, label %L1.split.L1.split.split_crit_edge
L1.split.L1.split.split_crit_edge: ; preds = %L1.L1.split_crit_edge
%33 = extractvalue { i64, i1 } %31, 0
%34 = icmp slt i64 %33, 1
br i1 %34, label %L8, label %L3.preheader
L1.split.split.us: ; preds = %L1.L1.split_crit_edge
br i1 undef, label %fail13, label %fail13
L3: ; preds = %L3, %scalar.ph
%"##i#7729.0" = phi i64 [ %44, %L3 ], [ %bc.trunc.resume.val, %scalar.ph ]
%35 = add i64 %"##i#7729.0", %52
%36 = getelementptr double, double* %7, i64 %35
%37 = load double, double* %36, align 8
%38 = add i64 %"##i#7729.0", %24
%39 = getelementptr double, double* %12, i64 %38
%40 = load double, double* %39, align 8
%41 = load double, double* %55, align 8
%42 = fmul double %40, %41
%43 = fadd double %37, %42
store double %43, double* %36, align 8
%44 = add nuw nsw i64 %"##i#7729.0", 1
%exitcond = icmp eq i64 %44, %33
br i1 %exitcond, label %L8.loopexit, label %L3
L8.loopexit: ; preds = %middle.block, %L3
br label %L8
L8: ; preds = %L8.loopexit, %L1.split.L1.split.split_crit_edge
%45 = add i64 %"#s1.0", 1
%46 = icmp eq i64 %"#s1.0", %21
br i1 %46, label %L9.loopexit, label %L1
L9.loopexit: ; preds = %L8
br label %L9
L9: ; preds = %L9.loopexit, %L
%47 = add i64 %"#s3.0", 1
%48 = icmp eq i64 %"#s3.0", %4
br i1 %48, label %L10.loopexit, label %L
L10.loopexit: ; preds = %L9
br label %L10
L10: ; preds = %L10.loopexit, %top
ret %jl_value_t* inttoptr (i64 140503009280016 to %jl_value_t*)
fail.split: ; preds = %L1
%49 = load %jl_value_t*, %jl_value_t** @jl_overflow_exception, align 8
call void @jl_throw(%jl_value_t* %49)
unreachable
fail13: ; preds = %L1.split.split.us, %L1.split.split.us
%50 = load %jl_value_t*, %jl_value_t** @jl_overflow_exception, align 8
call void @jl_throw(%jl_value_t* %50)
unreachable
L3.preheader: ; preds = %L1.split.L1.split.split_crit_edge
%51 = add i64 %"#s1.0", -1
%52 = mul i64 %51, %10
%53 = mul i64 %51, %20
%54 = add i64 %23, %53
%55 = getelementptr double, double* %17, i64 %54
%backedge.overflow = icmp eq i64 %33, 0
br i1 %backedge.overflow, label %scalar.ph, label %overflow.checked
overflow.checked: ; preds = %L3.preheader
%n.vec = and i64 %33, -8
%cmp.zero = icmp eq i64 %n.vec, 0
br i1 %cmp.zero, label %middle.block, label %vector.stridecheck
vector.stridecheck: ; preds = %overflow.checked
%stride.chk = icmp eq i64 %20, 1
br i1 %stride.chk, label %vector.ph, label %middle.block
vector.ph: ; preds = %vector.stridecheck
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%56 = add i64 %index, %52
%57 = getelementptr double, double* %7, i64 %56
%58 = bitcast double* %57 to <4 x double>*
%wide.load = load <4 x double>, <4 x double>* %58, align 8
%59 = getelementptr double, double* %57, i64 4
%60 = bitcast double* %59 to <4 x double>*
%wide.load18 = load <4 x double>, <4 x double>* %60, align 8
%61 = add i64 %index, %24
%62 = getelementptr double, double* %12, i64 %61
%63 = bitcast double* %62 to <4 x double>*
%wide.load21 = load <4 x double>, <4 x double>* %63, align 8
%64 = getelementptr double, double* %62, i64 4
%65 = bitcast double* %64 to <4 x double>*
%wide.load22 = load <4 x double>, <4 x double>* %65, align 8
%66 = load double, double* %55, align 8
%67 = insertelement <4 x double> undef, double %66, i32 0
%68 = insertelement <4 x double> %67, double %66, i32 1
%69 = insertelement <4 x double> %68, double %66, i32 2
%70 = insertelement <4 x double> %69, double %66, i32 3
%71 = insertelement <4 x double> undef, double %66, i32 0
%72 = insertelement <4 x double> %71, double %66, i32 1
%73 = load double, double* %55, align 8
%74 = insertelement <4 x double> %72, double %73, i32 2
%75 = insertelement <4 x double> %74, double %73, i32 3
%76 = fmul <4 x double> %wide.load21, %70
%77 = fmul <4 x double> %wide.load22, %75
%78 = fadd <4 x double> %wide.load, %76
%79 = fadd <4 x double> %wide.load18, %77
%80 = bitcast double* %57 to <4 x double>*
store <4 x double> %78, <4 x double>* %80, align 8
%81 = bitcast double* %59 to <4 x double>*
store <4 x double> %79, <4 x double>* %81, align 8
%index.next = add i64 %index, 8
%82 = icmp eq i64 %index.next, %n.vec
br i1 %82, label %middle.block, label %vector.body
middle.block: ; preds = %vector.stridecheck, %vector.body, %overflow.checked
%resume.val = phi i64 [ 0, %overflow.checked ], [ 0, %vector.stridecheck ], [ %n.vec, %vector.body ]
%trunc.resume.val = phi i64 [ 0, %overflow.checked ], [ 0, %vector.stridecheck ], [ %n.vec, %vector.body ]
%cmp.n = icmp eq i64 %33, %resume.val
br i1 %cmp.n, label %L8.loopexit, label %scalar.ph
scalar.ph: ; preds = %middle.block, %L3.preheader
%bc.trunc.resume.val = phi i64 [ %trunc.resume.val, %middle.block ], [ 0, %L3.preheader ]
br label %L3
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment