Last active
July 16, 2018 22:32
-
-
Save crusaderky/89819258ff960d06136d45526f7d05db to your computer and use it in GitHub Desktop.
to_csv benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.DataArray 'mul-a39f522da57bff7c1e435f9a642d1171' (r: 25000, c: 750)>\n", | |
"dask.array<shape=(25000, 750), dtype=float64, chunksize=(3125, 750)>\n", | |
"Coordinates:\n", | |
" * r (r) <U9 'row 0' 'row 1' 'row 2' 'row 3' 'row 4' 'row 5' 'row 6' ...\n", | |
" * c (c) <U7 'col 0' 'col 1' 'col 2' 'col 3' 'col 4' 'col 5' 'col 6' ..." | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import xarray\n", | |
"import dask.array as da\n", | |
"from xarray_extras.csv import to_csv\n", | |
"\n", | |
"# Production sizes\n", | |
"# ROWS = 500000\n", | |
"# Quick test\n", | |
"ROWS = 25000\n", | |
"\n", | |
"a = xarray.DataArray(da.random.random((ROWS, 750), chunks=(ROWS // 8, -1)) * 1e6,\n", | |
" dims=['r', 'c'],\n", | |
" coords={'r': ['row %d' % i for i in range(25000)],\n", | |
" 'c': ['col %d' % i for i in range(750)]})\n", | |
"a" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"!rm -rf /dev/shm/{v1,v2}\n", | |
"!mkdir /dev/shm/v2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 6 ms, sys: 5 ms, total: 11 ms\n", | |
"Wall time: 10.9 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"future1 = to_csv(a, '/dev/shm/v1.csv.gz', compression='gzip', float_format='%.2f')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 205 ms, sys: 2.89 s, total: 3.1 s\n", | |
"Wall time: 10 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"future1.compute()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 5.72 s, sys: 0 ns, total: 5.72 s\n", | |
"Wall time: 5.77 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"future2 = a.to_dataset('c').to_dask_dataframe().to_csv(\n", | |
" '/dev/shm/v2', compression='gzip', float_format='%.2f', index=False, compute=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 2min 7s, sys: 18.4 s, total: 2min 25s\n", | |
"Wall time: 1min 57s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"import dask\n", | |
"dask.compute(*future2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment