Skip to content

Instantly share code, notes, and snippets.

@simonbyrne
Last active August 29, 2015 14:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simonbyrne/3ebdcc6298b1661be19c to your computer and use it in GitHub Desktop.
Save simonbyrne/3ebdcc6298b1661be19c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# Modified version of https://gist.github.com/tshort/9b872f0cd12760d9563d
# * Tuple approach uses `Field{:a}()` instead of `Field(:a)` (including in the length of the loop).
# * Column `b` is changed to `Int`.
module SimonDataFrame
type DataFrame{N,D}
data::D
end
immutable Field{s}
end
Field(s::Symbol) = Field{s}()
function DataFrame(;kwds...)
names = Any[]
data = Any[]
types = Any[]
for (n, d) in kwds
push!(names,n)
push!(data,d)
push!(types,typeof(d))
end
N = tuple(names...)
T = tuple(types...)
DataFrame{N,T}(tuple(data...))
end
stagedfunction getindex{N,D,s}(d::DataFrame{N,D},f::Field{s})
m = Dict(zip(N,1:length(N)))
j = m[s]
:(d.data[$j])
end
stagedfunction getindex{N,D,s}(d::DataFrame{N,D},i::Integer,f::Field{s})
m = Dict(zip(N,1:length(N)))
j = m[s]
:(d.data[$j][i])
end
stagedfunction getindex{N,D}(d::DataFrame{N,D},i::Integer)
Expr(:tuple,[:(d.data[$j][i]) for j in 1:length(D)]...)
end
getindex(d::DataFrame,s::Symbol) = d[Field(s)]
getindex(d::DataFrame,i::Int,s::Symbol) = d[i,Field(s)]
srand(1)
const n = 5_000_000
a = rand(n)
b = round(Int,rand(n))
sdf = DataFrame(a = a, b = b)
function dot1(df::DataFrame)
x = 0.0
for i in 1:length(df[:a])
x += df[:a][i] * df[:a][i]
end
return x
end
function dot2(df::DataFrame)
x = 0.0
for i in 1:length(df[:a])
x += df[i,:a] * df[i,:a]
end
return x
end
function dot3(df::DataFrame)
x = 0.0
for i in 1:length(df[Field{:a}()])
x += df[Field{:a}()][i] * df[Field{:a}()][i]
end
return x
end
function dot4(df::DataFrame)
x = 0.0
for i in 1:length(df[Field{:a}()])
x += df[i,Field{:a}()] * df[i,Field{:a}()]
end
return x
end
sdf[1,Field(:a)]
@show t1 = @elapsed dot1(sdf)
@show t2 = @elapsed dot2(sdf)
@show t3 = @elapsed dot3(sdf)
@show t4 = @elapsed dot4(sdf)
end
module CompositeDataFramesTimings
using DataArrays, DataFrames
using DataFramesMeta ## NOTE: MUST BE THE DEVELOPMENT VERSION
srand(1)
const n = 5_000_000
a = rand(n)
b = round(Int,rand(n))
cdf = CompositeDataFrame(a = a, b = b)
df = DataFrame(cdf)
function dot1(df::AbstractDataFrame)
x = 0.0
for i in 1:size(df, 1)
x += df[:a][i] * df[:a][i]
end
return x
end
function dot2(df::AbstractDataFrame)
x = 0.0
for i in 1:size(df, 1)
x += df[i,:a] * df[i,:a]
end
return x
end
@show c1 = @elapsed dot1(df)
@show c2 = @elapsed dot2(df)
@show c1c = @elapsed dot1(cdf)
@show c2c = @elapsed dot2(cdf)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment