Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save andersy005/8f19f4fdb0feea253ee6784d6bd92199 to your computer and use it in GitHub Desktop.
Save andersy005/8f19f4fdb0feea253ee6784d6bd92199 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Numpy and Dask Array memory leak"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import dask.array as da\n",
"x = da.ones((2e4, 2e4), chunks=(2e4, 100))\n",
"y = x.rechunk((100, 2e4))\n",
"z = y.rechunk((2e4, 100))"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import psutil\n",
"proc = psutil.Process()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"84.28 MB\n"
]
}
],
"source": [
"from distributed.utils import format_bytes\n",
"print(format_bytes(proc.memory_info().rss))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3420"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(z.dask)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from dask.diagnostics import ProgressBar\n",
"ProgressBar().register()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[########################################] | 100% Completed | 6.0s\n",
"[########################################] | 100% Completed | 0.1s\n"
]
},
{
"data": {
"text/plain": [
"400000000.0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# z.sum().compute(scheduler='single-threaded') # This doesn't cause problems\n",
"z.sum().compute(scheduler='threads') # This leaks around 500MB of memory"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"605.26 MB\n"
]
}
],
"source": [
"print(format_bytes(proc.memory_info().rss))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" types | # objects | total size\n",
"=========================================== | =========== | ============\n",
" <class 'str | 60517 | 8.75 MB\n",
" <class 'dict | 11991 | 5.30 MB\n",
" <class 'code | 19697 | 2.72 MB\n",
" <class 'type | 2228 | 2.24 MB\n",
" <class 'tuple | 16142 | 1.04 MB\n",
" <class 'set | 2285 | 858.84 KB\n",
" <class 'list | 7284 | 738.09 KB\n",
" <class 'weakref | 4412 | 344.69 KB\n",
" <class 'abc.ABCMeta | 261 | 263.54 KB\n",
" function (__init__) | 1378 | 183.02 KB\n",
" <class 'traitlets.traitlets.MetaHasTraits | 180 | 175.45 KB\n",
" <class 'wrapper_descriptor | 2240 | 175.00 KB\n",
" <class 'int | 5584 | 168.92 KB\n",
" <class 'getset_descriptor | 2389 | 167.98 KB\n",
" <class 'collections.OrderedDict | 292 | 141.00 KB\n"
]
}
],
"source": [
"from pympler import muppy\n",
"all_objects = muppy.get_objects()\n",
"\n",
"from pympler import summary\n",
"sum1 = summary.summarize(all_objects)\n",
"summary.print_(sum1) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment