Created
March 26, 2019 19:55
-
-
Save davidanthoff/233b52f05ee9222d0d904863566b1d7b to your computer and use it in GitHub Desktop.
CSV perf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"using CSVFiles, DataFrames, TableReader, TextParse" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CSVFiles.jl\n", | |
" 0.129145 seconds (649.75 k allocations: 58.259 MiB, 62.85% gc time)\n", | |
"TableReader.jl\n", | |
" 0.178718 seconds (583 allocations: 8.811 MiB)\n" | |
] | |
} | |
], | |
"source": [ | |
"csvread(\"diamonds.csv\");\n", | |
"readcsv(\"diamonds.csv\");\n", | |
"GC.gc(); GC.gc();\n", | |
"println(\"CSVFiles.jl\")\n", | |
"@time csvread(\"diamonds.csv\");\n", | |
"GC.gc(); GC.gc();\n", | |
"println(\"TableReader.jl\")\n", | |
"@time readcsv(\"diamonds.csv\");" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CSVFiles.jl\n", | |
" 0.248445 seconds (5.57 M allocations: 507.156 MiB, 24.06% gc time)\n", | |
"TableReader.jl\n", | |
" 0.112455 seconds (246.44 k allocations: 48.231 MiB, 9.92% gc time)\n" | |
] | |
} | |
], | |
"source": [ | |
"csvread(\"flights14.csv\");\n", | |
"readcsv(\"flights14.csv\");\n", | |
"GC.gc(); GC.gc();\n", | |
"println(\"CSVFiles.jl\")\n", | |
"@time csvread(\"flights14.csv\");\n", | |
"GC.gc(); GC.gc();\n", | |
"println(\"TableReader.jl\")\n", | |
"@time readcsv(\"flights14.csv\");" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CSVFiles.jl\n", | |
" 24.814286 seconds (353.22 M allocations: 12.152 GiB, 16.97% gc time)\n", | |
"TableReader.jl\n", | |
" 35.728565 seconds (54.33 M allocations: 5.468 GiB, 8.01% gc time)\n" | |
] | |
} | |
], | |
"source": [ | |
"csvread(\"parking-citations.csv\", colparsers=Dict(1=>String));\n", | |
"readcsv(\"parking-citations.csv\", chunksize=0);\n", | |
"GC.gc(); GC.gc();\n", | |
"println(\"CSVFiles.jl\")\n", | |
"@time csvread(\"parking-citations.csv\", colparsers=Dict(1=>String));\n", | |
"GC.gc(); GC.gc();\n", | |
"println(\"TableReader.jl\")\n", | |
"@time readcsv(\"parking-citations.csv\", chunksize=0);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CSVFiles.jl\n", | |
" 6.998413 seconds (43.92 M allocations: 2.020 GiB, 25.83% gc time)\n", | |
"TableReader.jl\n", | |
" 8.989041 seconds (34.17 M allocations: 1.859 GiB, 19.04% gc time)\n" | |
] | |
} | |
], | |
"source": [ | |
"csvread(\"tmpnotab.csv\", colparsers=Dict(24=>String));\n", | |
"readcsv(\"tmpnotab.csv\");\n", | |
"GC.gc(); GC.gc();\n", | |
"println(\"CSVFiles.jl\")\n", | |
"@time csvread(\"tmpnotab.csv\", colparsers=Dict(24=>String));\n", | |
"GC.gc(); GC.gc();\n", | |
"println(\"TableReader.jl\")\n", | |
"@time readcsv(\"tmpnotab.csv\");" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CSVFiles.jl\n", | |
" 0.347390 seconds (1.69 M allocations: 126.447 MiB, 22.19% gc time)\n", | |
"TableReader.jl\n", | |
" 0.411119 seconds (1.52 M allocations: 103.196 MiB, 14.78% gc time)\n" | |
] | |
} | |
], | |
"source": [ | |
"csvread(\"winemag-data-130k-v2.csv\");\n", | |
"readcsv(\"winemag-data-130k-v2.csv\");\n", | |
"GC.gc(); GC.gc();\n", | |
"println(\"CSVFiles.jl\")\n", | |
"@time csvread(\"winemag-data-130k-v2.csv\");\n", | |
"GC.gc(); GC.gc();\n", | |
"println(\"TableReader.jl\")\n", | |
"@time readcsv(\"winemag-data-130k-v2.csv\");" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "TableReader.ReadError", | |
"evalue": "TableReader.ReadError: invalid file format at line 4513, column 7 (found '\\t')", | |
"output_type": "error", | |
"traceback": [ | |
"TableReader.ReadError: invalid file format at line 4513, column 7 (found '\\t')", | |
"", | |
"Stacktrace:", | |
" [1] scanline!(::Array{TableReader.Token,2}, ::Int64, ::TranscodingStreams.Memory, ::Int64, ::Int64, ::Int64, ::TableReader.ParserParameters) at C:\\Users\\david\\.julia\\packages\\TableReader\\c33JW\\src\\tokenizer.jl:1046", | |
" [2] readdlm_internal(::TranscodingStreams.TranscodingStream{TranscodingStreams.Noop,IOStream}, ::TableReader.ParserParameters) at C:\\Users\\david\\.julia\\packages\\TableReader\\c33JW\\src\\TableReader.jl:405", | |
" [3] (::getfield(TableReader, Symbol(\"##14#19\")){TableReader.ParserParameters})(::IOStream) at C:\\Users\\david\\.julia\\packages\\TableReader\\c33JW\\src\\TableReader.jl:250", | |
" [4] #open#310(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::getfield(TableReader, Symbol(\"##14#19\")){TableReader.ParserParameters}, ::String) at .\\iostream.jl:369", | |
" [5] open at .\\iostream.jl:367 [inlined]", | |
" [6] #readcsv#12(::Char, ::Char, ::Bool, ::Bool, ::Int64, ::Bool, ::Nothing, ::Bool, ::Int64, ::typeof(readcsv), ::String) at C:\\Users\\david\\.julia\\packages\\TableReader\\c33JW\\src\\TableReader.jl:250", | |
" [7] readcsv(::String) at C:\\Users\\david\\.julia\\packages\\TableReader\\c33JW\\src\\TableReader.jl:241", | |
" [8] top-level scope at In[7]:2" | |
] | |
} | |
], | |
"source": [ | |
"csvread(\"tmp0u3qt3mu.csv\", colparsers=Dict(24=>String));\n", | |
"readcsv(\"tmp0u3qt3mu.csv\");\n", | |
"GC.gc(); GC.gc();\n", | |
"println(\"CSVFiles.jl\")\n", | |
"@time csvread(\"tmp0u3qt3mu.csv\", colparsers=Dict(24=>String));\n", | |
"GC.gc(); GC.gc();\n", | |
"println(\"TableReader.jl\")\n", | |
"@time readcsv(\"tmp0u3qt3mu.csv\");" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Julia 1.1.0", | |
"language": "julia", | |
"name": "julia-1.1" | |
}, | |
"language_info": { | |
"file_extension": ".jl", | |
"mimetype": "application/julia", | |
"name": "julia", | |
"version": "1.1.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment