Skip to content

Instantly share code, notes, and snippets.

@ArchRobison
Created March 27, 2014 14:38
Show Gist options
  • Save ArchRobison/9808976 to your computer and use it in GitHub Desktop.
Save ArchRobison/9808976 to your computer and use it in GitHub Desktop.
Example using Julia and LLVM trunk. SLPVectorizer appears to have generated superfluious extracts.
$ cat vadd.jl
function vadd( a::NTuple{4,Float64}, b::NTuple{4,Float64} )
(a[1]+b[1],a[2]+b[2],a[3]+b[3],a[4]+b[4])
end
function vadd_one!(arr::Array{Float64, 1})
len = length(arr) # assuming len multiple of 4
one = (1.0, 1.0, 1.0, 1.0)
@inbounds for i = 1:4:len
inp = (arr[i], arr[i+1], arr[i+2], arr[i+3])
out = vadd(inp, one)
for j = 1:4
arr[i + j] = out[j]
end
end
end
code_llvm(vadd_one!,(Array{Float64, 1},))
$ julia vadd.jl
define %jl_value_t* @"julia_vadd_one!15119"(%jl_value_t*, %jl_value_t**, i32) {
top:
%3 = load %jl_value_t** %1, align 8, !dbg !8
%4 = getelementptr inbounds %jl_value_t* %3, i64 2, i32 0, !dbg !9
%5 = load %jl_value_t** %4, align 8, !dbg !9, !tbaa %jtbaa_arraylen
%6 = ptrtoint %jl_value_t* %5 to i64, !dbg !9
%7 = call %Range @julia_colon820(i64 1, i64 4, i64 %6), !dbg !14, !julia_type !15
%8 = extractvalue %Range %7, 2, !dbg !14
%9 = icmp slt i64 %8, 1, !dbg !14
br i1 %9, label %L9, label %pass2.preheader, !dbg !14
pass2.preheader: ; preds = %top
%10 = extractvalue %Range %7, 0, !dbg !14
%11 = extractvalue %Range %7, 1, !dbg !14
%12 = getelementptr inbounds %jl_value_t* %3, i64 1, i32 0, !dbg !16
%13 = load %jl_value_t** %12, align 8, !dbg !16, !tbaa %jtbaa_arrayptr
br label %pass2, !dbg !14
pass2: ; preds = %pass2, %pass2.preheader
%"#s2.0" = phi i64 [ %39, %pass2 ], [ 0, %pass2.preheader ]
%14 = mul i64 %"#s2.0", %11, !dbg !14
%15 = add i64 %14, %10, !dbg !14
%16 = add i64 %15, -1, !dbg !16
%17 = getelementptr %jl_value_t* %13, i64 %16, !dbg !16
%18 = bitcast %jl_value_t* %17 to <2 x double>*, !dbg !16
%19 = load <2 x double>* %18, align 8, !dbg !16, !tbaa %jtbaa_user
%20 = add i64 %15, 1, !dbg !16
%21 = getelementptr %jl_value_t* %13, i64 %20, !dbg !16
%22 = bitcast %jl_value_t* %21 to <2 x double>*, !dbg !16
%23 = load <2 x double>* %22, align 8, !dbg !16, !tbaa %jtbaa_user
%24 = fadd <2 x double> %19, <double 1.000000e+00, double 1.000000e+00>, !dbg !19
%25 = fadd <2 x double> %23, <double 1.000000e+00, double 1.000000e+00>, !dbg !19
%26 = extractelement <2 x double> %24, i32 0, !dbg !20
%27 = getelementptr %jl_value_t* %13, i64 %15, !dbg !20
%28 = bitcast %jl_value_t* %27 to double*, !dbg !20
store double %26, double* %28, align 8, !dbg !20, !tbaa %jtbaa_user
%29 = extractelement <2 x double> %24, i32 1, !dbg !20
%30 = bitcast %jl_value_t* %21 to double*, !dbg !20
store double %29, double* %30, align 8, !dbg !20, !tbaa %jtbaa_user
%31 = extractelement <2 x double> %25, i32 0, !dbg !20
%32 = add i64 %15, 2, !dbg !20
%33 = getelementptr %jl_value_t* %13, i64 %32, !dbg !20
%34 = bitcast %jl_value_t* %33 to double*, !dbg !20
store double %31, double* %34, align 8, !dbg !20, !tbaa %jtbaa_user
%35 = extractelement <2 x double> %25, i32 1, !dbg !20
%36 = add i64 %15, 3, !dbg !20
%37 = getelementptr %jl_value_t* %13, i64 %36, !dbg !20
%38 = bitcast %jl_value_t* %37 to double*, !dbg !20
store double %35, double* %38, align 8, !dbg !20, !tbaa %jtbaa_user
%39 = add i64 %"#s2.0", 1, !dbg !14
%exitcond = icmp eq i64 %39, %8, !dbg !20
br i1 %exitcond, label %L9, label %pass2, !dbg !20
L9: ; preds = %pass2, %top
ret %jl_value_t* inttoptr (i64 13122672 to %jl_value_t*), !dbg !20
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment