Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Julia serialization is dog-slow
using JDF
using CSV, DataFrames
using Serialization:serialize,deserialize
using BufferedStreams
@time a = CSV.read("C:/Users/ZJ.DAI/Documents/git/format-wars/data/Performance_2016Q4.txt", delim = '|', header = false);
io = BufferedOutputStream(open("c:/data/bin.bin","w"))
@time serialize(io, a)
close(io)
io = BufferedInputStream(open("c:/data/bin.bin","r"))
@time a = deserialize(io)
close(io)
using Blosc
compress(a::Vector{Union{Missing, T}}) where T = a
compress(a::Vector{String}) = a
compress(a) = Blosc.compress(a)
ac = Vector[compress(Array(a)) for a in eachcol(a)]
io = open("c:/data/bin2.bin","w")
@time serialize(io,ac)
close(io)
using Serialization, DataFrames, Blosc
io = open("c:/data/bin2.bin","r")
@time ac = deserialize(io)
close(io)
for n in names(a)
if eltype(a[:,n]) in [String, Union{Missing, String}]
select!(a, Not(inds))
end
end
@time CSV.write("c:/data/a.csv", a)
using CSV, JDF
@time a = CSV.read("c:/data/a.csv");
@time io = BufferedOutputStream(open("c:/data/a.meta", "w"))
@time serialize(io, m)
close(io)
@time m = savejdf(a, "c:/data/a.jdf")
using Revise
using FileIO, JDF, BufferedStreams, Serialization
@time io = BufferedInputStream(open("c:/data/a.meta", "r"))
@time m=deserialize(io)
close(io)
@time bb = loadjdf("c:/data/a.jld2", m)
2+2
@gzhang8

This comment has been minimized.

Copy link

@gzhang8 gzhang8 commented Apr 11, 2020

Hello. I run into a similar problem. Do you find a solution to make Serialization faster?

@xiaodaigh

This comment has been minimized.

Copy link
Owner Author

@xiaodaigh xiaodaigh commented Apr 11, 2020

No. But JDF.jl is kinda fast

@gzhang8

This comment has been minimized.

Copy link

@gzhang8 gzhang8 commented Apr 12, 2020

I see. Thanks

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.