jgoldfar/README.md

## README.md

      
    Raw
  

              README.md
            
          
    A bit interested in @ViralBShah's announcement on julia-users about Yeppp.jl and had to take a look at possible performance improvements in a project I am working on. Looks like his assessment is about right, based on the two implemented test functions here.
-------
With log:
elapsed in Yeppp: 2.152373e-02
elapsed in Julia, vectorized: 1.557364e-01. Yeppp speedup: 7.235570e+00
elapsed in Julia, devectorized: 1.428047e-01. Yeppp speedup: 6.634756e+00
-------
-------
With sum:
elapsed in Yeppp: 1.164229e-02
elapsed in Julia, vectorized: 4.672740e-03. Yeppp speedup: 4.013590e-01
elapsed in Julia, devectorized: 5.267061e-03. Yeppp speedup: 4.524075e-01
-------

Bummer. I was hoping for something faster to come out of sum... Interestingly enough, Yeppp is faster than devectorized Julia code as well, when it is faster at all. This may have some implications for what I am working on.
Update

Added a few more test functions (see updated yeppptest.jl) and got the following additional results:
-------
With sumabs2:
elapsed in Yeppp: 1.022824e-02
elapsed in Julia, vectorized: 4.414432e-03. Yeppp speedup: 4.315925e-01
elapsed in Julia, devectorized: 5.337171e-03. Yeppp speedup: 5.218073e-01
-------
-------
With subtract!:
elapsed in Yeppp: 1.667625e-02
elapsed in Julia, vectorized: 2.471381e-02. Yeppp speedup: 1.481977e+00
elapsed in Julia, devectorized: 1.991622e-02. Yeppp speedup: 1.194287e+00
-------
-------
With dot:
elapsed in Yeppp: 1.414469e-02
elapsed in Julia, vectorized: 8.485621e-03. Yeppp speedup: 5.999155e-01
elapsed in Julia, devectorized: 8.085542e-03. Yeppp speedup: 5.716308e-01
-------

Conclusions: Yeppp.jl is slower at sumabs2 (which is faster if you use it in a vectorized form, which is unsurprising, I guess) but faster at subtracting! Also a testament to Julia's codebase that devectorizing doesn't help that much (still worth it in some cases.) Perhaps Yeppp! is worth it, when it is faster for vectorized operations, just for purposes of writing readable code?
Getting Yeppp.jl to work

For some reason (likely, laziness and not wanting to spend too much time on this) I wasn't able to get Yeppp! to install correctly as a library on my system. Putting libyeppp.so in the same directory didn't help. So also attached to this Gist is a modified Yeppp.jl with a hardcoded path to libyeppp (on my system) which is checked as if it were installed by BinDeps.jl

Feel free to use this code to see if Yeppp! can help you.
Me: Jonathan (Max) Goldfarb jgoldfar@gmail.com

  
## Yeppp.jl
module Yeppp

macro checked_lib(libname, path)
    (dlopen_e(path) == C_NULL) && error("Unable to load \n\n$libname ($path).")
    quote const $(esc(libname)) = $path end
end

# Load dependencies
@checked_lib libyeppp "/home/jgoldfar/yeppptest/libyeppp.so"

function __init__()
    const status = ccall( (:yepLibrary_Init, libyeppp), Int32, ())
    status != 0 && error("yepLibrary_Init: error: ", status)
end

function release()
    const status = ccall( (:yepLibrary_Release, libyeppp), Int32, ())
    status != 0 && error("yepLibrary_Release: error: ", status)
end


function dot(x::Vector{Float64}, y::Vector{Float64})
    assert(length(x) == length(y))
    n = length(x)
    dotproduct = Array(Float64, 1)
    const status = ccall( (:yepCore_DotProduct_V64fV64f_S64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Uint), x, y, dotproduct, n)
    status != 0 && error("yepCore_DotProduct_V64fV64f_S64f: error: ", status)
    dotproduct[1]
end

function sum(v::Vector{Float64})
    n = length(v)
    local s::Vector{Float64} = Array(Float64, 1)
    const status = ccall( (:yepCore_Sum_V64f_S64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), v, s, n)
    status != 0 && error("yepCore_Sum_V64f_S64f: error: ", status)
    s[1]
end

function sumabs(v::Vector{Float64})
    n = length(v)
    s = Array(Float64, 1)
    const status = ccall( (:yepCore_SumAbs_V64f_S64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), v, s, n)
    status != 0 && error("yepCore_SumAbs_V64f_S64f: error: ", status)
    s[1]
end

function sumabs2(v::Vector{Float64})
    n = length(v)
    s = Array(Float64, 1)
    const status = ccall( (:yepCore_SumSquares_V64f_S64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), v, s, n)
    status != 0 && error("yepCore_SumSquares_V64f_S64f: error: ", status)
    s[1]
end

function max!(res::Vector{Float64}, y::Vector{Float64}, x::Vector{Float64})
    assert(length(x) == length(y))
    n = length(x)
    const status = ccall( (:yepCore_Max_V64fV64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Uint), x, y, res, n)
    status != 0 && error("yepCore_Max_V64fV64f_V64f: error: ", status)
    res
end

function min!(res::Vector{Float64}, y::Vector{Float64}, x::Vector{Float64})
    assert(length(x) == length(y))
    n = length(x)
    const status = ccall( (:yepCore_Min_V64fV64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Uint), x, y, res, n)
    status != 0 && error("yepCore_Min_V64fV64f_V64f: error: ", status)
    res
end

function add!(res::Vector{Float64}, y::Vector{Float64}, x::Vector{Float64})
    assert(length(x) == length(y))
    n = length(x)
    const status = ccall( (:yepCore_Add_V64fV64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Uint), x, y, res, n)
    status != 0 && error("yepCore_Add_V64fV64f_V64f: error: ", status)
    res
end

function subtract!(res::Vector{Float64}, y::Vector{Float64}, x::Vector{Float64})
    assert(length(x) == length(y))
    n = length(x)
    const status = ccall( (:yepCore_Subtract_V64fV64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Uint), x, y, res, n)
    status != 0 && error("yepCore_Subtract_V64fV64f_V64f: error: ", status)
    res
end

function multiply!(res::Vector{Float64}, y::Vector{Float64}, x::Vector{Float64})
    assert(length(x) == length(y))
    n = length(x)
    const status = ccall( (:yepCore_Multiply_V64fV64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Uint), x, y, res, n)
    status != 0 && error("yepCore_Multiply_V64fV64f_V64f: error: ", status)
    res
end

function negate!(v::Vector{Float64})
    n = length(v)
    const status = ccall( (:yepCore_Negate_IV64f_IV64f, libyeppp), Int32, (Ptr{Float64}, Uint), v, n)
    status != 0 && error("yepCore_Negate_IV64f_IV64f: error: ", status)
    v
end

function log!(y::Vector{Float64}, x::Vector{Float64})
    assert(length(x) == length(y))
    n = length(x)
    const status = ccall( (:yepMath_Log_V64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), x, y, n)
    status != 0 && error("yepMath_Log_V64f_V64f: error: ", status)
    y
end

function exp!(y::Vector{Float64}, x::Vector{Float64})
    assert(length(x) == length(y))
    n = length(x)
    const status = ccall( (:yepMath_Exp_V64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), x, y, n)
    status != 0 && error("yepMath_Exp_V64f_V64f: error: ", status)
    y
end

function sin!(y::Vector{Float64}, x::Vector{Float64})
    assert(length(x) == length(y))
    n = length(x)
    const status = ccall( (:yepMath_Sin_V64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), x, y, n)
    status != 0 && error("yepMath_Sin_V64f_V64f: error: ", status)
    y
end

function cos!(y::Vector{Float64}, x::Vector{Float64})
    assert(length(x) == length(y))
    n = length(x)
    const status = ccall( (:yepMath_Cos_V64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), x, y, n)
    status != 0 && error("yepMath_Cos_V64f_V64f: error: ", status)
    y
end

function tan!(y::Vector{Float64}, x::Vector{Float64})
    assert(length(x) == length(y))
    n = length(x)
    const status = ccall( (:yepMath_Tan_V64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), x, y, n)
    status != 0 && error("yepMath_Tan_V64f_V64f: error: ", status)
    y
end

end # module

## yeppptest.jl
include(joinpath(dirname(@__FILE__), "Yeppp.jl"))

function test_log_devec(;ntest::Int = 10^7, nsamples::Int = 1)
    const x = rand(ntest)
    y = zeros(ntest)
    t1 = @elapsed begin
        for i in 1:nsamples
            Yeppp.log!(y, x)
        end
    end
    gc()
    t2 = @elapsed begin
        for i in 1:nsamples
            y = log(x)
        end
    end
    gc()
    t3 = @elapsed begin
        for i in 1:nsamples
            for j in 1:ntest
                y[j] = log(x[j])
            end
        end
    end
    gc()

    @printf "-------\nWith log:\n"
    @printf "elapsed in Yeppp: %e\n" (t1 / nsamples)
    @printf "elapsed in Julia, vectorized: %e. Yeppp speedup: %e\n" (t2 / nsamples) (t2 / t1)
    @printf "elapsed in Julia, devectorized: %e. Yeppp speedup: %e\n-------\n" (t3 / nsamples) (t3 / t1)
end
test_log_devec(nsamples = 2)

function test_sum_devec(;ntest::Int = 10^7, nsamples::Int = 1)
    const x = rand(ntest)
    y = 0.0
    t1 = @elapsed begin
        for i in 1:nsamples
            y = 0.0
            y = Yeppp.sum(x)
        end
    end
    gc()
    t2 = @elapsed begin
        for i in 1:nsamples
            y = 0.0
            y = sum(x)
        end
    end
    gc()
    t3 = @elapsed begin
        for i in 1:nsamples
            y = 0.0
            for j in 1:ntest
                y += x[j]
            end
        end
    end
    gc()

    @printf "-------\nWith sum:\n"
    @printf "elapsed in Yeppp: %e\n" (t1 / nsamples)
    @printf "elapsed in Julia, vectorized: %e. Yeppp speedup: %e\n" (t2 / nsamples) (t2 / t1)
    @printf "elapsed in Julia, devectorized: %e. Yeppp speedup: %e\n-------\n" (t3 / nsamples) (t3 / t1)
end
test_sum_devec(nsamples = 2)


function test_sumabs2_devec(;ntest::Int = 10^7, nsamples::Int = 1)
    const x = rand(ntest)
    y = 0.0
    t1 = @elapsed begin
        for i in 1:nsamples
            y = 0.0
            y = Yeppp.sumabs2(x)
        end
    end
    gc()
    t2 = @elapsed begin
        for i in 1:nsamples
            y = 0.0
            y = sumabs2(x)
        end
    end
    gc()
    t3 = @elapsed begin
        for i in 1:nsamples
            y = 0.0
            for j in 1:ntest
                y += abs2(x[j])
            end
        end
    end
    gc()

    @printf "-------\nWith sumabs2:\n"
    @printf "elapsed in Yeppp: %e\n" (t1 / nsamples)
    @printf "elapsed in Julia, vectorized: %e. Yeppp speedup: %e\n" (t2 / nsamples) (t2 / t1)
    @printf "elapsed in Julia, devectorized: %e. Yeppp speedup: %e\n-------\n" (t3 / nsamples) (t3 / t1)
end
test_sumabs2_devec(nsamples = 2)


function test_subtract_devec(;ntest::Int = 10^7, nsamples::Int = 1)
    const x1 = rand(ntest)
    const x2 = rand(ntest)
    y = zeros(ntest)
    t1 = @elapsed begin
        for i in 1:nsamples
            Yeppp.subtract!(y, x1, x2)
        end
    end
    gc()
    t2 = @elapsed begin
        for i in 1:nsamples
            y = x2 - x1
        end
    end
    gc()
    t3 = @elapsed begin
        for i in 1:nsamples
            for j in 1:ntest
                y[j] = x2[j] - x1[j]
            end
        end
    end
    gc()

    @printf "-------\nWith subtract!:\n"
    @printf "elapsed in Yeppp: %e\n" (t1 / nsamples)
    @printf "elapsed in Julia, vectorized: %e. Yeppp speedup: %e\n" (t2 / nsamples) (t2 / t1)
    @printf "elapsed in Julia, devectorized: %e. Yeppp speedup: %e\n-------\n" (t3 / nsamples) (t3 / t1)
end
test_subtract_devec(nsamples = 2)


function test_dot_devec(;ntest::Int = 10^7, nsamples::Int = 1)
    const x1 = rand(ntest)
    const x2 = rand(ntest)
    t1 = @elapsed begin
        for i in 1:nsamples
            y = 0.0
            y = Yeppp.dot(x1, x2)
        end
    end
    gc()
    t2 = @elapsed begin
        for i in 1:nsamples
            y = 0.0
            y = dot(x2, x1)
        end
    end
    gc()
    t3 = @elapsed begin
        for i in 1:nsamples
            y = 0.0
            for j in 1:ntest
                y += x2[j] * x1[j]
            end
        end
    end
    gc()

    @printf "-------\nWith dot:\n"
    @printf "elapsed in Yeppp: %e\n" (t1 / nsamples)
    @printf "elapsed in Julia, vectorized: %e. Yeppp speedup: %e\n" (t2 / nsamples) (t2 / t1)
    @printf "elapsed in Julia, devectorized: %e. Yeppp speedup: %e\n-------\n" (t3 / nsamples) (t3 / t1)
end
test_dot_devec(nsamples = 2)
	module Yeppp

	macro checked_lib(libname, path)
	(dlopen_e(path) == C_NULL) && error("Unable to load \n\n$libname ($path).")
	quote const $(esc(libname)) = $path end
	end

	# Load dependencies
	@checked_lib libyeppp "/home/jgoldfar/yeppptest/libyeppp.so"

	function __init__()
	const status = ccall( (:yepLibrary_Init, libyeppp), Int32, ())
	status != 0 && error("yepLibrary_Init: error: ", status)
	end

	function release()
	const status = ccall( (:yepLibrary_Release, libyeppp), Int32, ())
	status != 0 && error("yepLibrary_Release: error: ", status)
	end


	function dot(x::Vector{Float64}, y::Vector{Float64})
	assert(length(x) == length(y))
	n = length(x)
	dotproduct = Array(Float64, 1)
	const status = ccall( (:yepCore_DotProduct_V64fV64f_S64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Uint), x, y, dotproduct, n)
	status != 0 && error("yepCore_DotProduct_V64fV64f_S64f: error: ", status)
	dotproduct[1]
	end

	function sum(v::Vector{Float64})
	n = length(v)
	local s::Vector{Float64} = Array(Float64, 1)
	const status = ccall( (:yepCore_Sum_V64f_S64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), v, s, n)
	status != 0 && error("yepCore_Sum_V64f_S64f: error: ", status)
	s[1]
	end

	function sumabs(v::Vector{Float64})
	n = length(v)
	s = Array(Float64, 1)
	const status = ccall( (:yepCore_SumAbs_V64f_S64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), v, s, n)
	status != 0 && error("yepCore_SumAbs_V64f_S64f: error: ", status)
	s[1]
	end

	function sumabs2(v::Vector{Float64})
	n = length(v)
	s = Array(Float64, 1)
	const status = ccall( (:yepCore_SumSquares_V64f_S64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), v, s, n)
	status != 0 && error("yepCore_SumSquares_V64f_S64f: error: ", status)
	s[1]
	end

	function max!(res::Vector{Float64}, y::Vector{Float64}, x::Vector{Float64})
	assert(length(x) == length(y))
	n = length(x)
	const status = ccall( (:yepCore_Max_V64fV64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Uint), x, y, res, n)
	status != 0 && error("yepCore_Max_V64fV64f_V64f: error: ", status)
	res
	end

	function min!(res::Vector{Float64}, y::Vector{Float64}, x::Vector{Float64})
	assert(length(x) == length(y))
	n = length(x)
	const status = ccall( (:yepCore_Min_V64fV64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Uint), x, y, res, n)
	status != 0 && error("yepCore_Min_V64fV64f_V64f: error: ", status)
	res
	end

	function add!(res::Vector{Float64}, y::Vector{Float64}, x::Vector{Float64})
	assert(length(x) == length(y))
	n = length(x)
	const status = ccall( (:yepCore_Add_V64fV64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Uint), x, y, res, n)
	status != 0 && error("yepCore_Add_V64fV64f_V64f: error: ", status)
	res
	end

	function subtract!(res::Vector{Float64}, y::Vector{Float64}, x::Vector{Float64})
	assert(length(x) == length(y))
	n = length(x)
	const status = ccall( (:yepCore_Subtract_V64fV64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Uint), x, y, res, n)
	status != 0 && error("yepCore_Subtract_V64fV64f_V64f: error: ", status)
	res
	end

	function multiply!(res::Vector{Float64}, y::Vector{Float64}, x::Vector{Float64})
	assert(length(x) == length(y))
	n = length(x)
	const status = ccall( (:yepCore_Multiply_V64fV64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Uint), x, y, res, n)
	status != 0 && error("yepCore_Multiply_V64fV64f_V64f: error: ", status)
	res
	end

	function negate!(v::Vector{Float64})
	n = length(v)
	const status = ccall( (:yepCore_Negate_IV64f_IV64f, libyeppp), Int32, (Ptr{Float64}, Uint), v, n)
	status != 0 && error("yepCore_Negate_IV64f_IV64f: error: ", status)
	v
	end

	function log!(y::Vector{Float64}, x::Vector{Float64})
	assert(length(x) == length(y))
	n = length(x)
	const status = ccall( (:yepMath_Log_V64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), x, y, n)
	status != 0 && error("yepMath_Log_V64f_V64f: error: ", status)
	y
	end

	function exp!(y::Vector{Float64}, x::Vector{Float64})
	assert(length(x) == length(y))
	n = length(x)
	const status = ccall( (:yepMath_Exp_V64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), x, y, n)
	status != 0 && error("yepMath_Exp_V64f_V64f: error: ", status)
	y
	end

	function sin!(y::Vector{Float64}, x::Vector{Float64})
	assert(length(x) == length(y))
	n = length(x)
	const status = ccall( (:yepMath_Sin_V64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), x, y, n)
	status != 0 && error("yepMath_Sin_V64f_V64f: error: ", status)
	y
	end

	function cos!(y::Vector{Float64}, x::Vector{Float64})
	assert(length(x) == length(y))
	n = length(x)
	const status = ccall( (:yepMath_Cos_V64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), x, y, n)
	status != 0 && error("yepMath_Cos_V64f_V64f: error: ", status)
	y
	end

	function tan!(y::Vector{Float64}, x::Vector{Float64})
	assert(length(x) == length(y))
	n = length(x)
	const status = ccall( (:yepMath_Tan_V64f_V64f, libyeppp), Int32, (Ptr{Float64}, Ptr{Float64}, Uint), x, y, n)
	status != 0 && error("yepMath_Tan_V64f_V64f: error: ", status)
	y
	end

	end # module
	include(joinpath(dirname(@__FILE__), "Yeppp.jl"))

	function test_log_devec(;ntest::Int = 10^7, nsamples::Int = 1)
	const x = rand(ntest)
	y = zeros(ntest)
	t1 = @elapsed begin
	for i in 1:nsamples
	Yeppp.log!(y, x)
	end
	end
	gc()
	t2 = @elapsed begin
	for i in 1:nsamples
	y = log(x)
	end
	end
	gc()
	t3 = @elapsed begin
	for i in 1:nsamples
	for j in 1:ntest
	y[j] = log(x[j])
	end
	end
	end
	gc()

	@printf "-------\nWith log:\n"
	@printf "elapsed in Yeppp: %e\n" (t1 / nsamples)
	@printf "elapsed in Julia, vectorized: %e. Yeppp speedup: %e\n" (t2 / nsamples) (t2 / t1)
	@printf "elapsed in Julia, devectorized: %e. Yeppp speedup: %e\n-------\n" (t3 / nsamples) (t3 / t1)
	end
	test_log_devec(nsamples = 2)

	function test_sum_devec(;ntest::Int = 10^7, nsamples::Int = 1)
	const x = rand(ntest)
	y = 0.0
	t1 = @elapsed begin
	for i in 1:nsamples
	y = 0.0
	y = Yeppp.sum(x)
	end
	end
	gc()
	t2 = @elapsed begin
	for i in 1:nsamples
	y = 0.0
	y = sum(x)
	end
	end
	gc()
	t3 = @elapsed begin
	for i in 1:nsamples
	y = 0.0
	for j in 1:ntest
	y += x[j]
	end
	end
	end
	gc()

	@printf "-------\nWith sum:\n"
	@printf "elapsed in Yeppp: %e\n" (t1 / nsamples)
	@printf "elapsed in Julia, vectorized: %e. Yeppp speedup: %e\n" (t2 / nsamples) (t2 / t1)
	@printf "elapsed in Julia, devectorized: %e. Yeppp speedup: %e\n-------\n" (t3 / nsamples) (t3 / t1)
	end
	test_sum_devec(nsamples = 2)


	function test_sumabs2_devec(;ntest::Int = 10^7, nsamples::Int = 1)
	const x = rand(ntest)
	y = 0.0
	t1 = @elapsed begin
	for i in 1:nsamples
	y = 0.0
	y = Yeppp.sumabs2(x)
	end
	end
	gc()
	t2 = @elapsed begin
	for i in 1:nsamples
	y = 0.0
	y = sumabs2(x)
	end
	end
	gc()
	t3 = @elapsed begin
	for i in 1:nsamples
	y = 0.0
	for j in 1:ntest
	y += abs2(x[j])
	end
	end
	end
	gc()

	@printf "-------\nWith sumabs2:\n"
	@printf "elapsed in Yeppp: %e\n" (t1 / nsamples)
	@printf "elapsed in Julia, vectorized: %e. Yeppp speedup: %e\n" (t2 / nsamples) (t2 / t1)
	@printf "elapsed in Julia, devectorized: %e. Yeppp speedup: %e\n-------\n" (t3 / nsamples) (t3 / t1)
	end
	test_sumabs2_devec(nsamples = 2)


	function test_subtract_devec(;ntest::Int = 10^7, nsamples::Int = 1)
	const x1 = rand(ntest)
	const x2 = rand(ntest)
	y = zeros(ntest)
	t1 = @elapsed begin
	for i in 1:nsamples
	Yeppp.subtract!(y, x1, x2)
	end
	end
	gc()
	t2 = @elapsed begin
	for i in 1:nsamples
	y = x2 - x1
	end
	end
	gc()
	t3 = @elapsed begin
	for i in 1:nsamples
	for j in 1:ntest
	y[j] = x2[j] - x1[j]
	end
	end
	end
	gc()

	@printf "-------\nWith subtract!:\n"
	@printf "elapsed in Yeppp: %e\n" (t1 / nsamples)
	@printf "elapsed in Julia, vectorized: %e. Yeppp speedup: %e\n" (t2 / nsamples) (t2 / t1)
	@printf "elapsed in Julia, devectorized: %e. Yeppp speedup: %e\n-------\n" (t3 / nsamples) (t3 / t1)
	end
	test_subtract_devec(nsamples = 2)


	function test_dot_devec(;ntest::Int = 10^7, nsamples::Int = 1)
	const x1 = rand(ntest)
	const x2 = rand(ntest)
	t1 = @elapsed begin
	for i in 1:nsamples
	y = 0.0
	y = Yeppp.dot(x1, x2)
	end
	end
	gc()
	t2 = @elapsed begin
	for i in 1:nsamples
	y = 0.0
	y = dot(x2, x1)
	end
	end
	gc()
	t3 = @elapsed begin
	for i in 1:nsamples
	y = 0.0
	for j in 1:ntest
	y += x2[j] * x1[j]
	end
	end
	end
	gc()

	@printf "-------\nWith dot:\n"
	@printf "elapsed in Yeppp: %e\n" (t1 / nsamples)
	@printf "elapsed in Julia, vectorized: %e. Yeppp speedup: %e\n" (t2 / nsamples) (t2 / t1)
	@printf "elapsed in Julia, devectorized: %e. Yeppp speedup: %e\n-------\n" (t3 / nsamples) (t3 / t1)
	end
	test_dot_devec(nsamples = 2)