Created
March 9, 2016 22:04
-
-
Save ArchRobison/f913f6b6a9c5743ff17e to your computer and use it in GitHub Desktop.
Example Program
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@noinline function matmul!{N}(C, A, B, ::Type{Val{N}}) | |
@inbounds for i in 1:N, j in 1:N | |
@simd for k in 1:N | |
C[k, j] += A[k, i] * B[i, j] | |
end | |
end | |
end | |
n = 1 | |
A = rand(Float64, n, n) | |
@code_llvm matmul!(A, A, A, Val{n}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ julia matmul.jl | |
define %jl_value_t* @"julia_matmul!_23350.1"(%jl_value_t*, %jl_value_t*, %jl_value_t*, %jl_value_t*) #0 { | |
top: | |
%4 = load i64, i64* inttoptr (i64 140503009370240 to i64*), align 128 | |
%5 = icmp slt i64 %4, 1 | |
br i1 %5, label %L10, label %L.preheader | |
L.preheader: ; preds = %top | |
%6 = bitcast %jl_value_t* %0 to double** | |
%7 = load double*, double** %6, align 8 | |
%8 = getelementptr inbounds %jl_value_t, %jl_value_t* %0, i64 3, i32 0 | |
%9 = bitcast %jl_value_t** %8 to i64* | |
%10 = load i64, i64* %9, align 8 | |
%11 = bitcast %jl_value_t* %1 to double** | |
%12 = load double*, double** %11, align 8 | |
%13 = getelementptr inbounds %jl_value_t, %jl_value_t* %1, i64 3, i32 0 | |
%14 = bitcast %jl_value_t** %13 to i64* | |
%15 = load i64, i64* %14, align 8 | |
%16 = bitcast %jl_value_t* %2 to double** | |
%17 = load double*, double** %16, align 8 | |
%18 = getelementptr inbounds %jl_value_t, %jl_value_t* %2, i64 3, i32 0 | |
%19 = bitcast %jl_value_t** %18 to i64* | |
%20 = load i64, i64* %19, align 8 | |
br label %L | |
L: ; preds = %L.preheader, %L9 | |
%"#s3.0" = phi i64 [ %47, %L9 ], [ 1, %L.preheader ] | |
%21 = load i64, i64* inttoptr (i64 140503009370240 to i64*), align 128 | |
%22 = icmp slt i64 %21, 1 | |
br i1 %22, label %L9, label %L1.preheader | |
L1.preheader: ; preds = %L | |
%23 = add i64 %"#s3.0", -1 | |
%24 = mul i64 %23, %15 | |
br label %L1 | |
L1: ; preds = %L1.preheader, %L8 | |
%"#s1.0" = phi i64 [ %45, %L8 ], [ 1, %L1.preheader ] | |
%25 = load i64, i64* inttoptr (i64 140503009370240 to i64*), align 128 | |
%26 = icmp sgt i64 %25, 0 | |
%27 = select i1 %26, i64 %25, i64 0 | |
%28 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %27, i64 1) | |
%29 = extractvalue { i64, i1 } %28, 1 | |
br i1 %29, label %fail.split, label %L1.L1.split_crit_edge | |
L1.L1.split_crit_edge: ; preds = %L1 | |
%30 = extractvalue { i64, i1 } %28, 0 | |
%31 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %30, i64 1) | |
%32 = extractvalue { i64, i1 } %31, 1 | |
br i1 %32, label %L1.split.split.us, label %L1.split.L1.split.split_crit_edge | |
L1.split.L1.split.split_crit_edge: ; preds = %L1.L1.split_crit_edge | |
%33 = extractvalue { i64, i1 } %31, 0 | |
%34 = icmp slt i64 %33, 1 | |
br i1 %34, label %L8, label %L3.preheader | |
L1.split.split.us: ; preds = %L1.L1.split_crit_edge | |
br i1 undef, label %fail13, label %fail13 | |
L3: ; preds = %L3, %scalar.ph | |
%"##i#7729.0" = phi i64 [ %44, %L3 ], [ %bc.trunc.resume.val, %scalar.ph ] | |
%35 = add i64 %"##i#7729.0", %52 | |
%36 = getelementptr double, double* %7, i64 %35 | |
%37 = load double, double* %36, align 8 | |
%38 = add i64 %"##i#7729.0", %24 | |
%39 = getelementptr double, double* %12, i64 %38 | |
%40 = load double, double* %39, align 8 | |
%41 = load double, double* %55, align 8 | |
%42 = fmul double %40, %41 | |
%43 = fadd double %37, %42 | |
store double %43, double* %36, align 8 | |
%44 = add nuw nsw i64 %"##i#7729.0", 1 | |
%exitcond = icmp eq i64 %44, %33 | |
br i1 %exitcond, label %L8.loopexit, label %L3 | |
L8.loopexit: ; preds = %middle.block, %L3 | |
br label %L8 | |
L8: ; preds = %L8.loopexit, %L1.split.L1.split.split_crit_edge | |
%45 = add i64 %"#s1.0", 1 | |
%46 = icmp eq i64 %"#s1.0", %21 | |
br i1 %46, label %L9.loopexit, label %L1 | |
L9.loopexit: ; preds = %L8 | |
br label %L9 | |
L9: ; preds = %L9.loopexit, %L | |
%47 = add i64 %"#s3.0", 1 | |
%48 = icmp eq i64 %"#s3.0", %4 | |
br i1 %48, label %L10.loopexit, label %L | |
L10.loopexit: ; preds = %L9 | |
br label %L10 | |
L10: ; preds = %L10.loopexit, %top | |
ret %jl_value_t* inttoptr (i64 140503009280016 to %jl_value_t*) | |
fail.split: ; preds = %L1 | |
%49 = load %jl_value_t*, %jl_value_t** @jl_overflow_exception, align 8 | |
call void @jl_throw(%jl_value_t* %49) | |
unreachable | |
fail13: ; preds = %L1.split.split.us, %L1.split.split.us | |
%50 = load %jl_value_t*, %jl_value_t** @jl_overflow_exception, align 8 | |
call void @jl_throw(%jl_value_t* %50) | |
unreachable | |
L3.preheader: ; preds = %L1.split.L1.split.split_crit_edge | |
%51 = add i64 %"#s1.0", -1 | |
%52 = mul i64 %51, %10 | |
%53 = mul i64 %51, %20 | |
%54 = add i64 %23, %53 | |
%55 = getelementptr double, double* %17, i64 %54 | |
%backedge.overflow = icmp eq i64 %33, 0 | |
br i1 %backedge.overflow, label %scalar.ph, label %overflow.checked | |
overflow.checked: ; preds = %L3.preheader | |
%n.vec = and i64 %33, -8 | |
%cmp.zero = icmp eq i64 %n.vec, 0 | |
br i1 %cmp.zero, label %middle.block, label %vector.stridecheck | |
vector.stridecheck: ; preds = %overflow.checked | |
%stride.chk = icmp eq i64 %20, 1 | |
br i1 %stride.chk, label %vector.ph, label %middle.block | |
vector.ph: ; preds = %vector.stridecheck | |
br label %vector.body | |
vector.body: ; preds = %vector.body, %vector.ph | |
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | |
%56 = add i64 %index, %52 | |
%57 = getelementptr double, double* %7, i64 %56 | |
%58 = bitcast double* %57 to <4 x double>* | |
%wide.load = load <4 x double>, <4 x double>* %58, align 8 | |
%59 = getelementptr double, double* %57, i64 4 | |
%60 = bitcast double* %59 to <4 x double>* | |
%wide.load18 = load <4 x double>, <4 x double>* %60, align 8 | |
%61 = add i64 %index, %24 | |
%62 = getelementptr double, double* %12, i64 %61 | |
%63 = bitcast double* %62 to <4 x double>* | |
%wide.load21 = load <4 x double>, <4 x double>* %63, align 8 | |
%64 = getelementptr double, double* %62, i64 4 | |
%65 = bitcast double* %64 to <4 x double>* | |
%wide.load22 = load <4 x double>, <4 x double>* %65, align 8 | |
%66 = load double, double* %55, align 8 | |
%67 = insertelement <4 x double> undef, double %66, i32 0 | |
%68 = insertelement <4 x double> %67, double %66, i32 1 | |
%69 = insertelement <4 x double> %68, double %66, i32 2 | |
%70 = insertelement <4 x double> %69, double %66, i32 3 | |
%71 = insertelement <4 x double> undef, double %66, i32 0 | |
%72 = insertelement <4 x double> %71, double %66, i32 1 | |
%73 = load double, double* %55, align 8 | |
%74 = insertelement <4 x double> %72, double %73, i32 2 | |
%75 = insertelement <4 x double> %74, double %73, i32 3 | |
%76 = fmul <4 x double> %wide.load21, %70 | |
%77 = fmul <4 x double> %wide.load22, %75 | |
%78 = fadd <4 x double> %wide.load, %76 | |
%79 = fadd <4 x double> %wide.load18, %77 | |
%80 = bitcast double* %57 to <4 x double>* | |
store <4 x double> %78, <4 x double>* %80, align 8 | |
%81 = bitcast double* %59 to <4 x double>* | |
store <4 x double> %79, <4 x double>* %81, align 8 | |
%index.next = add i64 %index, 8 | |
%82 = icmp eq i64 %index.next, %n.vec | |
br i1 %82, label %middle.block, label %vector.body | |
middle.block: ; preds = %vector.stridecheck, %vector.body, %overflow.checked | |
%resume.val = phi i64 [ 0, %overflow.checked ], [ 0, %vector.stridecheck ], [ %n.vec, %vector.body ] | |
%trunc.resume.val = phi i64 [ 0, %overflow.checked ], [ 0, %vector.stridecheck ], [ %n.vec, %vector.body ] | |
%cmp.n = icmp eq i64 %33, %resume.val | |
br i1 %cmp.n, label %L8.loopexit, label %scalar.ph | |
scalar.ph: ; preds = %middle.block, %L3.preheader | |
%bc.trunc.resume.val = phi i64 [ %trunc.resume.val, %middle.block ], [ 0, %L3.preheader ] | |
br label %L3 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment