Last active
October 1, 2018 23:57
-
-
Save simonbyrne/fb33de343bf870fd670313a118ca104e to your computer and use it in GitHub Desktop.
CSV benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Random, DataFrames, CSV, CSVFiles, Pandas, CSVReader, TextParse, RCall | |
R"library(data.table)" | |
# only use a few decimal places: we're trying to avoid trigerring slow paths | |
function writedata(n) | |
df = DataFrames.DataFrame(a=rand(0:1e6,n)./100, | |
b=rand(0:1e6,n)./100, | |
c=rand(0:1e6,n)./100, | |
d=[randstring(10) for i=1:n]) | |
CSV.write("data.csv", df) | |
end | |
writedata(10_000) | |
# Precompile | |
@time CSV.read("data.csv"); # CSV | |
@time DataFrames.DataFrame(CSVFiles.load("data.csv")); #CSVFiles | |
@time Pandas.read_csv("data.csv"); # Pandas | |
@time CSVReader.read_csv("data.csv"); | |
@time TextParse.csvread("data.csv", pooledstrings=false, type_detect_rows=100); | |
@time R"fread('data.csv')"; # data.table | |
@time CSV.read("data.csv"); # CSV | |
@time DataFrames.DataFrame(CSVFiles.load("data.csv")); #CSVFiles | |
@time Pandas.read_csv("data.csv"); # Pandas | |
@time CSVReader.read_csv("data.csv"); | |
@time TextParse.csvread("data.csv", pooledstrings=false, type_detect_rows=100); | |
@time R"fread('data.csv')"; # data.table | |
writedata(10_000_000) | |
@time CSV.read("data.csv"); # CSV | |
@time Pandas.read_csv("data.csv"); # Pandas | |
@time R"fread('data.csv')"; # data.table |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[[AxisArrays]] | |
deps = ["Compat", "Dates", "IntervalSets", "IterTools", "Pkg", "Random", "RangeArrays", "Test"] | |
git-tree-sha1 = "2e2536e9e6f27c4f8d09d8442b61a7ae0b910c28" | |
uuid = "39de3d68-74b9-583c-8d2d-e117c070f3a9" | |
version = "0.3.0" | |
[[Base64]] | |
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" | |
[[BinaryProvider]] | |
deps = ["Libdl", "Pkg", "SHA", "Test"] | |
git-tree-sha1 = "48c147e63431adbcee69bc40b04c3f0fec0a4982" | |
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232" | |
version = "0.5.0" | |
[[CSV]] | |
deps = ["CategoricalArrays", "DataFrames", "DataStreams", "Dates", "Mmap", "Parsers", "Pkg", "Profile", "Random", "Tables", "Test", "Unicode", "WeakRefStrings"] | |
git-tree-sha1 = "da83cb359d838758adf057719cdbfeffb074aabe" | |
uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" | |
version = "0.4.1" | |
[[CSVFiles]] | |
deps = ["DataValues", "FileIO", "HTTP", "IterableTables", "IteratorInterfaceExtensions", "TableShowUtils", "TableTraits", "TableTraitsUtils", "Test", "TextParse"] | |
git-tree-sha1 = "b39c9d94d944ab5c7ee9d7503509a63ea21d564c" | |
uuid = "5d742f6a-9f54-50ce-8119-2520741973ca" | |
version = "0.9.1" | |
[[CSVReader]] | |
deps = ["DataFrames", "InternedStrings", "Parsers"] | |
git-tree-sha1 = "2ca3695def16c1ba3f8b6cca23f4c1c8364de478" | |
repo-rev = "master" | |
repo-url = "https://github.com/tk3369/CSVReader.jl" | |
uuid = "6320db66-f659-5b09-9a97-e9f7ce0d36e4" | |
version = "0.1.0" | |
[[CategoricalArrays]] | |
deps = ["Compat", "Future", "JSON", "Missings", "Printf", "Reexport"] | |
git-tree-sha1 = "6362c49130b5888f5628bc197ee5f17aec7d2a88" | |
uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597" | |
version = "0.4.0" | |
[[CodecZlib]] | |
deps = ["BinaryProvider", "Libdl", "Pkg", "Test", "TranscodingStreams"] | |
git-tree-sha1 = "83cb3d65c37ea1364c2d5bf7bcea41843ba645dc" | |
uuid = "944b1d66-785c-5afd-91f1-9de20f533193" | |
version = "0.5.0" | |
[[Compat]] | |
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] | |
git-tree-sha1 = "ff2595695fc4f14427358ce2593f867085c45dcb" | |
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" | |
version = "1.2.0" | |
[[Conda]] | |
deps = ["Compat", "JSON", "VersionParsing"] | |
git-tree-sha1 = "85b5bf3ffcf4f39abe019dab1dd00a0aead8d882" | |
uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d" | |
version = "1.0.2" | |
[[DataFrames]] | |
deps = ["CategoricalArrays", "CodecZlib", "Compat", "DataStreams", "Dates", "InteractiveUtils", "IteratorInterfaceExtensions", "LinearAlgebra", "Missings", "Pkg", "Printf", "Random", "Reexport", "SortingAlgorithms", "Statistics", "StatsBase", "TableTraits", "Tables", "Test", "TranscodingStreams", "Unicode", "WeakRefStrings"] | |
git-tree-sha1 = "0fcb0c9914f31e0607b1965dc5a9e15c969c4806" | |
uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" | |
version = "0.14.0" | |
[[DataStreams]] | |
deps = ["Dates", "Missings", "Pkg", "Test", "WeakRefStrings"] | |
git-tree-sha1 = "69c72a1beb4fc79490c361635664e13c8e4a9548" | |
uuid = "9a8bc11e-79be-5b39-94d7-1ccc349a1a85" | |
version = "0.4.1" | |
[[DataStructures]] | |
deps = ["InteractiveUtils", "OrderedCollections", "REPL", "Random", "Serialization", "Test"] | |
git-tree-sha1 = "8fc6e166e24fda04b2b648d4260cdad241788c54" | |
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" | |
version = "0.14.0" | |
[[DataValues]] | |
deps = ["Dates", "InteractiveUtils", "LinearAlgebra", "Random", "Test"] | |
git-tree-sha1 = "4fedccda7e5111354c7dcc832c7da83ff7258765" | |
uuid = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5" | |
version = "0.4.5" | |
[[Dates]] | |
deps = ["Printf"] | |
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" | |
[[DelimitedFiles]] | |
deps = ["Mmap"] | |
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" | |
[[Distributed]] | |
deps = ["LinearAlgebra", "Random", "Serialization", "Sockets"] | |
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" | |
[[FileIO]] | |
deps = ["Pkg", "Random", "Test"] | |
git-tree-sha1 = "b80161b7e679a1241f9441ebfa60b62d4239cf99" | |
uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" | |
version = "1.0.1" | |
[[Future]] | |
deps = ["Random"] | |
uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" | |
[[HTTP]] | |
deps = ["Base64", "Dates", "Distributed", "IniFile", "MbedTLS", "Sockets", "Test"] | |
git-tree-sha1 = "b881f69331e85642be315c63d05ed65d6fc8a05b" | |
uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" | |
version = "0.7.1" | |
[[IniFile]] | |
deps = ["Test"] | |
git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8" | |
uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f" | |
version = "0.5.0" | |
[[InteractiveUtils]] | |
deps = ["LinearAlgebra", "Markdown"] | |
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" | |
[[InternedStrings]] | |
deps = ["Random", "Test"] | |
git-tree-sha1 = "eb05b5625bc5d821b8075a77e4c421933e20c76b" | |
uuid = "7d512f48-7fb1-5a58-b986-67e6dc259f01" | |
version = "0.7.0" | |
[[IntervalSets]] | |
deps = ["Compat"] | |
git-tree-sha1 = "bf1c727a12bbe0beb4888d439ee4e91b9ba7944a" | |
uuid = "8197267c-284f-5f27-9208-e0e47529a953" | |
version = "0.3.0" | |
[[IterTools]] | |
deps = ["Pkg", "SparseArrays", "Test"] | |
git-tree-sha1 = "ed0787e62dc46b8d8c7c3db54391d71e0da5fefd" | |
uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e" | |
version = "1.0.0" | |
[[IterableTables]] | |
deps = ["DataValues", "IteratorInterfaceExtensions", "Requires", "TableTraits", "TableTraitsUtils", "Test"] | |
git-tree-sha1 = "486612943cd16ebb7fcc5b0a4dc2c80e8b9c7dbc" | |
uuid = "1c8ee90f-4401-5389-894e-7a04a3dc0f4d" | |
version = "0.9.0" | |
[[IteratorInterfaceExtensions]] | |
deps = ["Test"] | |
git-tree-sha1 = "5484e5ede2a4137b9643f4d646e8e7b87b794415" | |
uuid = "82899510-4779-5014-852e-03e436cf321d" | |
version = "0.1.1" | |
[[JSON]] | |
deps = ["Dates", "Distributed", "Mmap", "Pkg", "Sockets", "Test", "Unicode"] | |
git-tree-sha1 = "fec8e4d433072731466d37ed0061b3ba7f70eeb9" | |
uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" | |
version = "0.19.0" | |
[[Lazy]] | |
deps = ["Compat", "MacroTools", "Test"] | |
git-tree-sha1 = "1c2c5566f0eeaaad6979c156562384458f966e6a" | |
uuid = "50d2b5c4-7a5e-59d5-8109-a42b560f39c0" | |
version = "0.13.1" | |
[[LibGit2]] | |
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" | |
[[Libdl]] | |
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" | |
[[LinearAlgebra]] | |
deps = ["Libdl"] | |
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" | |
[[Logging]] | |
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" | |
[[MacroTools]] | |
deps = ["Compat"] | |
git-tree-sha1 = "c443e1c8d58a4e9f61b708ad0a88286c7042145b" | |
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" | |
version = "0.4.4" | |
[[Markdown]] | |
deps = ["Base64"] | |
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" | |
[[MbedTLS]] | |
deps = ["BinaryProvider", "Libdl", "Pkg", "Random", "Sockets", "Test"] | |
git-tree-sha1 = "3775d205b09b624aa06d39012a8920ba99cb3b8b" | |
uuid = "739be429-bea8-5141-9913-cc70e7f3736d" | |
version = "0.6.3" | |
[[Missings]] | |
deps = ["Dates", "InteractiveUtils", "SparseArrays", "Test"] | |
git-tree-sha1 = "adc26d2ee85a49c413464110d922cf21efc9d233" | |
uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" | |
version = "0.3.1" | |
[[Mmap]] | |
uuid = "a63ad114-7e13-5084-954f-fe012c677804" | |
[[Nullables]] | |
deps = ["Compat", "Pkg"] | |
git-tree-sha1 = "ae1a63457e14554df2159b0b028f48536125092d" | |
uuid = "4d1e1d77-625e-5b40-9113-a560ec7a8ecd" | |
version = "0.0.8" | |
[[OrderedCollections]] | |
deps = ["Pkg", "Random", "Serialization", "Test"] | |
git-tree-sha1 = "85619a3f3e17bb4761fe1b1fd47f0e979f964d5b" | |
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" | |
version = "1.0.2" | |
[[Pandas]] | |
deps = ["Compat", "DataValues", "IteratorInterfaceExtensions", "Lazy", "Pkg", "PyCall", "Statistics", "TableTraits", "TableTraitsUtils", "Test"] | |
git-tree-sha1 = "a6b6f02de94029e72d8bec68c5413fa80eb1156f" | |
uuid = "eadc2687-ae89-51f9-a5d9-86b5a6373a9c" | |
version = "1.0.2" | |
[[Parsers]] | |
deps = ["Dates", "Mmap", "Pkg", "Test"] | |
git-tree-sha1 = "d5252e3f228a513b9947585e95b94d146b7d66e4" | |
uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" | |
version = "0.2.7" | |
[[Pkg]] | |
deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] | |
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" | |
[[PooledArrays]] | |
deps = ["Test"] | |
git-tree-sha1 = "5c5ded7adc52867f599c21d3f43542fce491afda" | |
uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" | |
version = "0.4.1" | |
[[Printf]] | |
deps = ["Unicode"] | |
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" | |
[[Profile]] | |
deps = ["Printf"] | |
uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" | |
[[PyCall]] | |
deps = ["Compat", "Conda", "MacroTools", "Statistics", "VersionParsing"] | |
git-tree-sha1 = "f56428481fd0caf01cc8ecd2a0892fdaf8fddd50" | |
uuid = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" | |
version = "1.18.4" | |
[[RCall]] | |
deps = ["AxisArrays", "CategoricalArrays", "Conda", "DataFrames", "DataStructures", "Dates", "Libdl", "Missings", "Pkg", "REPL", "Random", "Requires", "StatsModels", "Test", "WinReg"] | |
git-tree-sha1 = "fe763209d3be186abfa4a8003b6678889b2ff679" | |
uuid = "6f49c342-dc21-5d91-9882-a32aef131414" | |
version = "0.12.1" | |
[[REPL]] | |
deps = ["InteractiveUtils", "Markdown", "Sockets"] | |
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" | |
[[Random]] | |
deps = ["Serialization"] | |
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" | |
[[RangeArrays]] | |
deps = ["Compat"] | |
git-tree-sha1 = "d925adfd5b01cb46fde89dc9548d167b3b136f4a" | |
uuid = "b3c3ace0-ae52-54e7-9d0b-2c1406fd6b9d" | |
version = "0.3.1" | |
[[Reexport]] | |
deps = ["Pkg"] | |
git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0" | |
uuid = "189a3867-3050-52da-a836-e630ba90ab69" | |
version = "0.2.0" | |
[[Requires]] | |
deps = ["Test"] | |
git-tree-sha1 = "f6fbf4ba64d295e146e49e021207993b6b48c7d1" | |
uuid = "ae029012-a4dd-5104-9daa-d747884805df" | |
version = "0.5.2" | |
[[SHA]] | |
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" | |
[[Serialization]] | |
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" | |
[[SharedArrays]] | |
deps = ["Distributed", "Mmap", "Random", "Serialization"] | |
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" | |
[[Sockets]] | |
uuid = "6462fe0b-24de-5631-8697-dd941f90decc" | |
[[SortingAlgorithms]] | |
deps = ["DataStructures", "Random", "Test"] | |
git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd" | |
uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" | |
version = "0.3.1" | |
[[SparseArrays]] | |
deps = ["LinearAlgebra", "Random"] | |
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" | |
[[Statistics]] | |
deps = ["LinearAlgebra", "SparseArrays"] | |
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" | |
[[StatsBase]] | |
deps = ["DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "Test"] | |
git-tree-sha1 = "723193a13e8078cec6dcd0b8fe245c8bfd81690e" | |
uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" | |
version = "0.25.0" | |
[[StatsModels]] | |
deps = ["Compat", "DataFrames", "Pkg", "StatsBase", "Test"] | |
git-tree-sha1 = "8af0d8dbcdee59daa386167ee0cf0278e5c44263" | |
uuid = "3eaba693-59b7-5ba5-a881-562e759f1c8d" | |
version = "0.3.1" | |
[[TableShowUtils]] | |
deps = ["DataValues", "Dates", "JSON", "Markdown", "Test"] | |
git-tree-sha1 = "7295e0ed103c41e71e0a893685090816527350ec" | |
uuid = "5e66a065-1f0a-5976-b372-e0b8c017ca10" | |
version = "0.2.0" | |
[[TableTraits]] | |
deps = ["IteratorInterfaceExtensions", "Test"] | |
git-tree-sha1 = "afee1fb3bc99c28eb4533ff0f22e33f6effcec18" | |
uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" | |
version = "0.3.1" | |
[[TableTraitsUtils]] | |
deps = ["DataValues", "IteratorInterfaceExtensions", "Missings", "Pkg", "TableTraits", "Test"] | |
git-tree-sha1 = "a355f1882d64881a11f853e64dcc353975c4df6e" | |
uuid = "382cd787-c1b6-5bf2-a167-d5b971a19bda" | |
version = "0.3.1" | |
[[Tables]] | |
deps = ["Pkg", "Requires", "Test"] | |
git-tree-sha1 = "277464179bc7cfb1b4d5a4f3ccde0fc75792157f" | |
uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" | |
version = "0.1.8" | |
[[Test]] | |
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] | |
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" | |
[[TextParse]] | |
deps = ["CodecZlib", "Compat", "DataStructures", "Dates", "Mmap", "Nullables", "PooledArrays", "Test", "WeakRefStrings"] | |
git-tree-sha1 = "f33529861ce1126edb9c0160243d5e67888bc5cc" | |
uuid = "e0df1984-e451-5cb5-8b61-797a481e67e3" | |
version = "0.6.0" | |
[[TranscodingStreams]] | |
deps = ["DelimitedFiles", "Pkg", "Random", "Test"] | |
git-tree-sha1 = "a34a2d588e2d2825602bf14a24216d5c8b0921ec" | |
uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" | |
version = "0.8.1" | |
[[UUIDs]] | |
deps = ["Random"] | |
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" | |
[[Unicode]] | |
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" | |
[[VersionParsing]] | |
deps = ["Compat"] | |
git-tree-sha1 = "c9d5aa108588b978bd859554660c8a5c4f2f7669" | |
uuid = "81def892-9a0e-5fdd-b105-ffc91e053289" | |
version = "1.1.3" | |
[[WeakRefStrings]] | |
deps = ["Missings", "Random", "Test"] | |
git-tree-sha1 = "1087e8be380f2c8b96434b02bb1150fc1c511135" | |
uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5" | |
version = "0.5.3" | |
[[WinReg]] | |
deps = ["Test"] | |
git-tree-sha1 = "808380e0a0483e134081cc54150be4177959b5f4" | |
uuid = "1b915085-20d7-51cf-bf83-8f477d6f5128" | |
version = "0.3.1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[deps] | |
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" | |
CSVFiles = "5d742f6a-9f54-50ce-8119-2520741973ca" | |
CSVReader = "6320db66-f659-5b09-9a97-e9f7ce0d36e4" | |
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" | |
Pandas = "eadc2687-ae89-51f9-a5d9-86b5a6373a9c" | |
RCall = "6f49c342-dc21-5d91-9882-a32aef131414" | |
TextParse = "e0df1984-e451-5cb5-8b61-797a481e67e3" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment