Skip to content

Instantly share code, notes, and snippets.

@tk3369
Created September 13, 2018 07:01
Show Gist options
  • Save tk3369/822fcf520295273152a5e5ddb9a3a6a9 to your computer and use it in GitHub Desktop.
Save tk3369/822fcf520295273152a5e5ddb9a3a6a9 to your computer and use it in GitHub Desktop.
Fast CSV reader in Julia
foo2(filename, parsers; headers=true, delimiter=",", nrows=0) = open(filename) do f
if headers
hdr = split(readline(f), delimiter)
end
lines = nrows == 0 ? [] : Vector(undef, nrows)
r = 0
while !eof(f)
line = readline(f)
row = map(x -> x[1](x[2]), zip(parsers, split(line, delimiter)))
r += 1
nrows == 0 ? push!(lines, row) : lines[r] = row
end
c = length(hdr)
n = length(lines)
if !headers
hdr = ["_$i" for i in 1:c]
end
DataFrame([[L[j] for L in lines] for j in 1:c], Symbol.(hdr))
end
julia> @btime foo2("random1000_1000.csv", $parsers; nrows=1000);
591.724 ms (3021860 allocations: 114.26 MiB)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment