Skip to content

Instantly share code, notes, and snippets.

Avatar

Arch D. Robison ArchRobison

  • Nvidia
  • Champaign, IL
View GitHub Profile
@ArchRobison
ArchRobison / session.txt
Last active Aug 28, 2016
Output from trying to install Vectorize.jl on MacOS (El Capitan)
View session.txt
...
x bindings/d/sources/
x bindings/d/sources/main.d
x bindings/d/sources/yeppp/
x bindings/d/sources/yeppp/types.d
x bindings/d/sources/yeppp/math.d
x bindings/d/sources/yeppp/library.d
x bindings/d/sources/yeppp/core.d
INFO: ====== Successfully installed Yeppp! ======
TESTING: abs!(Vector{Float32}, Vector{Complex{Float32}})
@ArchRobison
ArchRobison / Vector.jl-install-attempt.txt
Created Aug 28, 2016
Attempt to install Vector.jl on Ubuntu
View Vector.jl-install-attempt.txt
julia> Pkg.clone("http://github.com/rprechelt/Vectorize.jl")
INFO: Initializing package repository /home/archr/.julia/v0.5
INFO: Cloning METADATA from https://github.com/JuliaLang/METADATA.jl
INFO: Cloning Vectorize from http://github.com/rprechelt/Vectorize.jl
INFO: Computing changes...
INFO: No packages to install, update or remove
INFO: Package database updated
julia> Pkg.build("Vectorize")
INFO: Building Vectorize
View matmul.jl
@noinline function matmul!{N}(C, A, B, ::Type{Val{N}})
@inbounds for i in 1:N, j in 1:N
@simd for k in 1:N
C[k, j] += A[k, i] * B[i, j]
end
end
end
n = 1
A = rand(Float64, n, n)
View a.ll
define void @julia_load_array_of_float([4 x float]* %a, [4 x float]* %b, [4 x float]* %c) {
top:
%a_arr = load [4 x float], [4 x float]* %a, align 4
%a0 = extractvalue [4 x float] %a_arr, 0
%a2 = extractvalue [4 x float] %a_arr, 2
%a1 = extractvalue [4 x float] %a_arr, 1
%b_arr = load [4 x float], [4 x float]* %b, align 4
%b0 = extractvalue [4 x float] %b_arr, 0
%b2 = extractvalue [4 x float] %b_arr, 2
%b1 = extractvalue [4 x float] %b_arr, 1
@ArchRobison
ArchRobison / time_immutable.jl
Created Oct 30, 2015
Julia example susing Tuples or Immutable that vectorize if patch http://reviews.llvm.org/D14185 is applied.
View time_immutable.jl
import Base.rand
immutable Quad
a :: Float32
b :: Float32
c :: Float32
d :: Float32
end
@inline function add(x::Quad, y::Quad)
View example.ll
define void @julia_quod_24255([4 x float]* sret, [4 x float]*, [4 x float]*, [4 x float]*) {
top:
%4 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0
%5 = load float, float* %4, align 4
%6 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0
%7 = load float, float* %6, align 4
%8 = fmul float %5, %7
%9 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1
%10 = load float, float* %9, align 4
%11 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1
View benchmark.jl
T=NTuple{4,Float32}
zero(::Type{T}) = (0.0f0,0.0f0,0.0f0,0.f0)
function add( a::T, b::T )
(a[1]+b[1], a[2]+b[2], a[3]+b[3], a[4]+b[4])
end
function mul( a::T, b::T )
(a[1]*b[1], a[2]*b[2], a[3]*b[3], a[4]*b[4])
@ArchRobison
ArchRobison / demo.jl
Last active Aug 29, 2015
Julia program that demonstrates performance improvement from treating subnormals as zeros.
View demo.jl
# Apply smoothing operator to a, putting result in b.
function relax{T}( b::Vector{T}, a::Vector{T} )
assert(length(a)==length(b))
c = T(0.25)
d = T(0.5)
n = length(b)
b[1] = 1 # Boundary condition
@inbounds for i=2:n-1
b[i] = c*a[i-1]+d*a[i]+c*a[i+1]
end
View gist:1de1aa692f7c9e466004
Index: InstCombine/InstCombineCompares.cpp
===================================================================
--- InstCombine/InstCombineCompares.cpp (revision 242211)
+++ InstCombine/InstCombineCompares.cpp (working copy)
@@ -3687,70 +3687,79 @@
/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
Instruction *LHSI,
Constant *RHSC) {
if (!isa<ConstantFP>(RHSC)) return nullptr;
@ArchRobison
ArchRobison / gist:da1f26b658f8afd91699
Created May 27, 2015
Example LLVM code just before pass LICM runs.
View gist:da1f26b658f8afd91699
; Function Attrs: sspreq
define void @julia_foo_21080(%jl_value_t*, %jl_value_t*) #3 {
top:
%2 = bitcast %jl_value_t* %0 to i8**, !dbg !356
%3 = load i8** %2, align 8, !dbg !356, !tbaa %jtbaa_arrayptr
%4 = bitcast i8* %3 to float*, !dbg !356
%5 = getelementptr inbounds %jl_value_t* %0, i64 3, i32 0, !dbg !356
%6 = bitcast %jl_value_t** %5 to i64*, !dbg !356
%7 = load i64* %6, align 8, !dbg !356, !tbaa %jtbaa_arraysize