Skip to content

Instantly share code, notes, and snippets.

@tshort
Created April 10, 2013 17:44
Show Gist options
  • Save tshort/5356819 to your computer and use it in GitHub Desktop.
Save tshort/5356819 to your computer and use it in GitHub Desktop.
Explorations with column keys for DataFrames
importall Base
using DataFrames
type IX
df::AbstractDataFrame
end
IX(args...) = IX(DataFrame([args...]'))
length(x::IX) = 1
type DFVector <: AbstractArray{AbstractDataFrame,1}
df::AbstractDataFrame
end
start(itr::DFVector) = 1
done(itr::DFVector, i::Int) = i > nrow(itr.df)
next(itr::DFVector, i::Int) = (itr.df[i, :], i + 1)
size(itr::DFVector) = (nrow(itr.df), )
length(itr::DFVector) = nrow(itr.df)
getindex(itr::DFVector, i::Union(AbstractVector{Integer}, AbstractVector{Bool}, BitArray)) = DFVector(itr.df[i, :])
getindex(itr::DFVector, i::Int) = IX(itr.df[i, :])
map(f::Function, dfri::DFVector) = [f(row) for row in dfri]
DataFrames.upgrade_vector(v::DFVector) = v
show(io::IO, x::IX) = for i in 1:ncol(x.df) show(io, x.df[1,i]); print(io, " "); end
show(io::IO, x::DFVector) = for i in 1:length(x) show(io, x[i]); println(io) end
similar(x::DFVector, T, dims) = Array(T, dims)
function isless(x1::IX, x2::IX)
lt = x1.df .< x2.df
gt = x1.df .> x2.df
for i in 1:length(lt)
if lt[1,i]
return true
elseif gt[1,i]
return false
end
end
return false
end
function (==)(x1::IX, x2::IX)
for i in 1:min(ncol(x1.df), ncol(x2.df))
if x1.df[1,i] != x2.df[1,i]
return false
end
end
return true
end
srand(10)
N = 10
d = DataFrame(
a = letters[rand(10:11, N)],
b = LETTERS[rand(6:7, N)],
c = letters[rand(23:26, N)],
x = rand(N),
y = rand(N)
)
d["ix"] = IndexedVector(DFVector(d[:,[1:3]]))
d[d["ix"] .== IX("j"), :]
d[d["ix"] .== IX("j", "G"), :]
d[d["ix"] .== IX("j", "G", "y"), :]
@tshort
Copy link
Author

tshort commented Apr 10, 2013

Note, you need a pretty recent Julia and DataFrames for this code to work.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment