Skip to content

Instantly share code, notes, and snippets.

@ExpandingMan
Created October 4, 2016 16:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ExpandingMan/7c6e484e36c96a1869fd2b9309c75ef1 to your computer and use it in GitHub Desktop.
Save ExpandingMan/7c6e484e36c96a1869fd2b9309c75ef1 to your computer and use it in GitHub Desktop.
testing performance of Feather.jl
using DataFrames
using Feather
using PyCall
@pyimport feather as pyfeather
# using DatasToolbox
const NROWS = 2*10^6
const FILENAME = "devtest1.feather"
const PYTHON_FILENAME = "pythontest.feather"
function pandas(df::DataFrame)::PyObject
@pyimport pandas as pd
pydf = pd.DataFrame()
for col in names(df)
set!(pydf, string(col), df[col])
if eltype(df[col]) == DateTime
set!(pydf, string(col),
get(pydf, string(col))[:astype]("<M8[ns]"))
end
end
return pydf
end
export pandas
function make_test_df(dtypes::DataType...)
df = DataFrame()
for (idx, dtype) in enumerate(dtypes)
col = Symbol(string(dtype)*string(idx))
if dtype <: Real
df[col] = rand(dtype, NROWS)
elseif dtype <: AbstractString
df[col] = [randstring(rand(8:16)) for i in 1:NROWS]
elseif dtype <: Dates.TimeType
df[col] = [dtype(now()) + Dates.Day(i) for i in 1:NROWS]
elseif dtype <: Symbol
df[col] = [Symbol(randstring(rand(4:12))) for i in 1:NROWS]
end
end
return df
end
# df = DataFrame(rand(NROWS, 4))
# df = make_test_df(Float64, Float64, Float64)
df = make_test_df(Float64, Int64, String, DateTime)
# df = make_test_df(String, String, DateTime, Float64, String, String, String)
# df = make_test_df(Symbol, Symbol, DateTime, Float64, Symbol, Symbol, Symbol)
# df = make_test_df(Float64, Float64, Float64, Float64)
# df = make_test_df(Int64, Int64, Int64, Int64)
# df = make_test_df(String, String)
# df = make_test_df(Symbol, Symbol, Symbol, Symbol)
# df = make_test_df(DateTime, DateTime, DateTime, DateTime)
info("Serializing...")
@time Feather.write(FILENAME, df)
pydf = pandas(df)
info("Serializing with Python...")
@time pyfeather.write_dataframe(pydf, PYTHON_FILENAME)
info("Deserializing...")
@time df_test = Feather.read(FILENAME)
info("Deserializing with Python...")
@time pydf_test = pyfeather.read_dataframe(PYTHON_FILENAME)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment