Skip to content

Instantly share code, notes, and snippets.

@toivoh
Last active August 29, 2015 14:08
Show Gist options
  • Save toivoh/3cd3481dc66d9224811f to your computer and use it in GitHub Desktop.
Save toivoh/3cd3481dc66d9224811f to your computer and use it in GitHub Desktop.
Trying out the SLP vectorizer with Julia
module TestSLP
function rmw!{T}(dest::Ptr{T}, src::Ptr{T})
s1 = unsafe_load(src, 1)
s2 = unsafe_load(src, 2)
d1 = unsafe_load(dest, 1)
d2 = unsafe_load(dest, 2)
d1 $= s1
d2 $= s2
unsafe_store!(dest, d1, 1)
unsafe_store!(dest, d2, 2)
nothing
end
function rmw2!{T}(dest::Vector{T}, src::Vector{T})
psrc, pdest = pointer(src), pointer(dest)
s1 = unsafe_load(psrc, 1)
s2 = unsafe_load(psrc, 2)
d1 = unsafe_load(pdest, 1)
d2 = unsafe_load(pdest, 2)
d1 $= s1
d2 $= s2
unsafe_store!(pdest, d1, 1)
unsafe_store!(pdest, d2, 2)
nothing
end
function rmw2b!{T}(dest::Vector{T}, src::Vector{T})
@inbounds s1 = src[1]
@inbounds s2 = src[2]
@inbounds d1 = dest[1]
@inbounds d2 = dest[2]
d1 $= s1
d2 $= s2
pdest = pointer(dest)
unsafe_store!(pdest, d1, 1)
unsafe_store!(pdest, d2, 2)
nothing
end
function rmw3!{T}(dest::Vector{T}, src::Vector{T})
@inbounds s1 = src[1]
@inbounds s2 = src[2]
@inbounds d1 = dest[1]
@inbounds d2 = dest[2]
d1 $= s1
d2 $= s2
@inbounds dest[1] = d1
@inbounds dest[2] = d2
nothing
end
println("\n\nrmw!: ")
code_native(rmw!, (Ptr{Uint64}, Ptr{Uint64}))
println("\n\nrmw2!: ")
code_native(rmw2!, (Vector{Uint64}, Vector{Uint64}))
println("\n\nrmw2b!: ")
code_native(rmw2b!, (Vector{Uint64}, Vector{Uint64}))
println("\n\nrmw3!: ")
code_native(rmw3!, (Vector{Uint64}, Vector{Uint64}))
end # module
@ArchRobison
Copy link

Using LLVM 3.5.0 with branch adr/slpvector appears to vectorize the second example:

julia> code_native(rmw2!, (Vector{Uint64}, Vector{Uint64}))
        .text
Filename: slp.jl
Source line: 0
        push    rbp
        mov     rbp, rsp
Source line: 14
        mov     rax, qword ptr [rsi + 8]
        mov     rcx, qword ptr [rdi + 8]
Source line: 17
        vmovups xmm0, xmmword ptr [rcx]
Source line: 19
        vxorps  xmm0, xmm0, xmmword ptr [rax]
Source line: 21
        vmovups xmmword ptr [rcx], xmm0
Source line: 23
        pop     rbp
        ret

Third example fails to vectorize with the branch LLVM 3.5.0. It's doesn't appear to be an alias issue since the loads/stores have the right tbaa tags:

julia> code_llvm(rmw3!, (Vector{Uint64}, Vector{Uint64}))

define void @"julia_rmw3!40180"(%jl_value_t*, %jl_value_t*) {
top:
  %2 = getelementptr inbounds %jl_value_t* %1, i64 1, !dbg !8
  %3 = bitcast %jl_value_t* %2 to i8**, !dbg !8
  %4 = load i8** %3, align 8, !dbg !8, !tbaa %jtbaa_arrayptr
  %5 = bitcast i8* %4 to i64*, !dbg !8
  %6 = load i64* %5, align 8, !dbg !8, !tbaa %jtbaa_user, !julia_type !15
  %7 = getelementptr i8* %4, i64 8, !dbg !16
  %8 = bitcast i8* %7 to i64*, !dbg !16
  %9 = load i64* %8, align 8, !dbg !16, !tbaa %jtbaa_user, !julia_type !15
  %10 = getelementptr inbounds %jl_value_t* %0, i64 1, !dbg !17
  %11 = bitcast %jl_value_t* %10 to i8**, !dbg !17
  %12 = load i8** %11, align 8, !dbg !17, !tbaa %jtbaa_arrayptr
  %13 = bitcast i8* %12 to i64*, !dbg !17
  %14 = load i64* %13, align 8, !dbg !17, !tbaa %jtbaa_user, !julia_type !15
  %15 = getelementptr i8* %12, i64 8, !dbg !18
  %16 = bitcast i8* %15 to i64*, !dbg !18
  %17 = load i64* %16, align 8, !dbg !18, !tbaa %jtbaa_user, !julia_type !15
  %18 = xor i64 %14, %6, !dbg !19, !julia_type !15
  %19 = xor i64 %17, %9, !dbg !20, !julia_type !15
  store i64 %18, i64* %13, align 8, !dbg !21, !tbaa %jtbaa_user
  %20 = load i8** %11, align 8, !dbg !22, !tbaa %jtbaa_arrayptr
  %21 = getelementptr i8* %20, i64 8, !dbg !22
  %22 = bitcast i8* %21 to i64*, !dbg !22
  store i64 %19, i64* %22, align 8, !dbg !22, !tbaa %jtbaa_user
  ret void, !dbg !23
}

I'll poke around some more.

@toivoh
Copy link
Author

toivoh commented Nov 11, 2014

Nice! I don't understand the tbaa tags yet, perhaps I'll read up on them.

@toivoh
Copy link
Author

toivoh commented Nov 11, 2014

The problem seems to be something to do with the stores though, with LLVM 3.5.0 and branch adr/slpvector I can also vectorize rmw2b! (just added above) which uses @inbounds for the loads but unsafe_store! for the stores. I tried to look at the difference in code_llvm output between rmw2b! and rmw3!, but the main difference I see is that the first has been vectorized :) Maybe there should be a version of code_llvm that gives you the llvm code before optimization.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment