Skip to content

Instantly share code, notes, and snippets.

@tshort
Last active August 29, 2015 13:55
Show Gist options
  • Save tshort/8778410 to your computer and use it in GitHub Desktop.
Save tshort/8778410 to your computer and use it in GitHub Desktop.
Playing with an @with macro

@with macro

Here is an @with macro to evaluate DataFrame (or even Dict) columns using references.

replace_syms(x, membernames) = x
function replace_syms(e::Expr, membernames)
    if e.head != :quote
        return Expr(e.head, (isempty(e.args) ? e.args : map(x -> replace_syms(x, membernames), e.args))...)
    else
        if has(membernames, e.args[1])
            return membernames[e.args[1]]
        else
            a = gensym()
            membernames[e.args[1]] = a
            return a
        end
    end
end

function with_helper(d, body)
    membernames = Dict{Symbol, Symbol}()
    body = replace_syms(body, membernames)
    funargs = map(x -> :( getindex($d, $(Meta.quot(x))) ), collect(keys(membernames)))
    funname = gensym()
    return(:( function $funname($(collect(values(membernames))...)) $body end; $funname($(funargs...)) ))
end

macro with(d, body)
    with_helper(d, body)
end

Here are simple examples with a Dict:

d = {:s => 3, :y => 44, :d => 5}
x = @with(d, :s + :y)

t = 3
x = @with d :s + :y + t

x = @with d begin
    z = t + :y - 1
    :s + z
end

Here is the DataFrames example that @johnmyleswhite put together. I added dot9 as an example that uses @with. dot9 runs at comparable speeds to dot8.

dot1 => 0.01229682
dot2 => 0.388635147
dot3 => 1.89651897
dot4 => 0.355990775
dot5 => 0.013920764
dot6 => 0.015714847
dot7 => 0.065454129
dot8 => 0.013927748
dot9 => 0.013752586
using DataArrays, DataFrames

srand(1)
const n = 5_000_000
a = rand(n)
b = rand(n)
da = data(a)
db = data(b)
df = DataFrame(a = da, b = db)
df2 = DataFrame({a, b})
names!(df2, [:a, :b])

Base.values(da::DataArray) = da.data

function dot1(a::Vector, b::Vector)
    x = 0.0
    for i in 1:length(a)
        x += a[i] * b[i]
    end
    return x
end

function dot2(da::DataVector, db::DataVector)
    T = eltype(da)
    x = 0.0
    for i in 1:length(da)
        x += da[i]::T * db[i]::T
    end
    return x
end

function dot3(df::DataFrame)
    da, db = df[:a], df[:b]
    T = eltype(da)
    x = 0.0
    for i in 1:length(da)
        x += da[i]::T * db[i]::T
    end
    return x
end

function dot4(df::DataFrame)
    da, db = df[:a], df[:b]
    return dot2(da, db)
end

function dot5(da::DataVector, db::DataVector)
    x = 0.0
    for i in 1:length(da)
        x += da.data[i] * db.data[i]
    end
    return x
end

function dot6(da::DataVector, db::DataVector)
    x = 0.0
    for i in 1:length(da)
        x += values(da)[i] * values(db)[i]
    end
    return x
end

function dot7(da::DataVector, db::DataVector)
    x = 0.0
    for i in 1:length(da)
        if !(isna(da, i) || isna(da, i))
            x += values(da)[i] * values(db)[i]
        end
    end
    return x
end

function dot8(a::Vector, b::Vector)
    x = 0.0
    for i in 1:length(a)
        if !(isnan(a[i]) || isnan(a[i]))
            x += a[i] * b[i]
        end
    end
    return x
end

function dot9(df::DataFrame)
    @with df begin
        x = 0.0
        for i in 1:length(:a)
            x += values(:a)[i] * values(:b)[i]
        end
        x
    end
end

t1 = @elapsed dot1(a, b)
t2 = @elapsed dot2(da, db)
t3 = @elapsed dot3(df)
t4 = @elapsed dot4(df)
t5 = @elapsed dot5(da, db)
t6 = @elapsed dot6(da, db)
t7 = @elapsed dot7(da, db)
t8 = @elapsed dot8(a, b)
t9 = @elapsed dot9(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment