Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mprostock/2850f3cd465155689052f0fa3a177a50 to your computer and use it in GitHub Desktop.
Save mprostock/2850f3cd465155689052f0fa3a177a50 to your computer and use it in GitHub Desktop.
"Memory Leak" copy-on-access problem in pytorch dataloaders
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dataloader \"memory leak\" problems (that are no memory leak!) #13246\n",
"Notebook for demonstration purposes of bug https://github.com/pytorch/pytorch/issues/13246\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from torch.utils.data import Dataset, DataLoader\n",
"import numpy as np\n",
"import torch\n",
"import psutil\n",
"import uuid\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Set up Dataset\n",
"comment out either the data_np with dtype object or string to see the difference.\n",
"\n",
"The [reproduction example](https://github.com/pytorch/pytorch/issues/13246#issuecomment-436632186) by bfreskura in the bug thread showed the difference between a regular python list and a numpy array.\n",
"This aims to show that the problem is not (only) the python list itself, the same happens in a numpy array of type object.\n",
"Python lists store only references to the objects, the objects are kept separately in memory. Every\n",
"object has a refcount, therefore every item in the list has a refcount.\n",
"\n",
"Numpy arrays (of standard np types) are stored as continuous blocks in memory and are only ONE object with one\n",
"refcount. \n",
"\n",
"This changes if you make the numpy array explicitly of type object, which makes it start behaving like a regular\n",
"python list (only storing references to (string) objects). The same \"problems\" with memory consumption now appear.\n",
"\n",
"This would explain, why with regular lists (or numpy arrays of type object) we see the \"memory leak\", which actually\n",
"is the copy-on-acces problem of forked python processes due to changing refcounts, not a memory leak.\n",
"\n",
"So the problem probably (often) has got nothing to do with tensors or actual torch objects, but rather with the lists of filenames and dicts of labels, that are generally used within dataloaders/datasets.\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Size in Mem: 76.2939453125\n",
"Datatype of nparray: object\n",
"MemFlags: C_CONTIGUOUS : True\n",
" F_CONTIGUOUS : True\n",
" OWNDATA : True\n",
" WRITEABLE : True\n",
" ALIGNED : True\n",
" WRITEBACKIFCOPY : False\n",
" UPDATEIFCOPY : False\n",
"Size of item: 8\n",
"np strides: (8,)\n"
]
}
],
"source": [
"class DataIter(Dataset):\n",
" def __init__(self):\n",
" \n",
" self.data_np = np.array([str(uuid.uuid4()) for i in range(10000000)], dtype=object)\n",
" # self.data_np = np.array([str(uuid.uuid4()) for i in range(10000000)], dtype=np.string_)\n",
" \n",
" print('Size in Mem:', self.data_np.nbytes/1024**2)\n",
" print('Datatype of nparray:',self.data_np.dtype)\n",
" print('MemFlags:', self.data_np.flags)\n",
" print('Size of item:', self.data_np.itemsize)\n",
" #print('Mem adress':, self.data_np.data)\n",
" print('np strides:', self.data_np.strides) \n",
" \n",
" def __len__(self):\n",
" return len(self.data_np)\n",
"\n",
" def __getitem__(self, idx):\n",
" data = self.data_np[idx]\n",
" return 42\n",
"\n",
"\n",
"train_data = DataIter()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0 - 15.2 - 11.35 - 13.17 - 1.93\n",
" 1000 - 16.5 - 11.15 - 12.97 - 2.13\n",
" 2000 - 17.6 - 10.97 - 12.80 - 2.30\n",
" 3000 - 18.8 - 10.79 - 12.62 - 2.48\n",
" 4000 - 19.9 - 10.62 - 12.45 - 2.65\n",
" 5000 - 21.0 - 10.45 - 12.27 - 2.83\n",
" 6000 - 22.0 - 10.29 - 12.11 - 2.99\n",
" 7000 - 23.1 - 10.13 - 11.96 - 3.15\n",
" 8000 - 24.1 - 9.98 - 11.80 - 3.30\n",
" 9000 - 25.0 - 9.82 - 11.65 - 3.45\n",
" 10000 - 26.0 - 9.68 - 11.50 - 3.60\n",
" 11000 - 26.9 - 9.54 - 11.36 - 3.74\n",
" 12000 - 27.8 - 9.40 - 11.23 - 3.88\n",
" 13000 - 28.6 - 9.27 - 11.09 - 4.01\n",
" 14000 - 29.5 - 9.13 - 10.95 - 4.15\n",
" 15000 - 30.3 - 9.01 - 10.83 - 4.27\n",
" 16000 - 31.1 - 8.88 - 10.71 - 4.39\n",
" 17000 - 31.8 - 8.77 - 10.59 - 4.51\n",
" 18000 - 32.6 - 8.64 - 10.47 - 4.63\n",
" 19000 - 33.3 - 8.54 - 10.36 - 4.74\n",
" 20000 - 34.0 - 8.43 - 10.25 - 4.85\n",
" 21000 - 34.7 - 8.32 - 10.15 - 4.95\n",
" 22000 - 35.4 - 8.21 - 10.04 - 5.06\n",
" 23000 - 36.0 - 8.12 - 9.94 - 5.16\n",
" 24000 - 36.6 - 8.02 - 9.84 - 5.26\n",
" 25000 - 37.3 - 7.92 - 9.75 - 5.35\n",
" 26000 - 37.9 - 7.83 - 9.65 - 5.45\n",
" 27000 - 38.4 - 7.74 - 9.57 - 5.53\n",
" 28000 - 39.0 - 7.65 - 9.48 - 5.62\n",
" 29000 - 39.5 - 7.57 - 9.40 - 5.71\n",
" 30000 - 40.1 - 7.49 - 9.31 - 5.79\n",
" 31000 - 40.6 - 7.40 - 9.22 - 5.88\n",
" 32000 - 41.1 - 7.33 - 9.15 - 5.95\n",
" 33000 - 41.6 - 7.25 - 9.08 - 6.02\n",
" 34000 - 42.0 - 7.18 - 9.01 - 6.10\n",
" 35000 - 42.5 - 7.10 - 8.93 - 6.17\n",
" 36000 - 43.0 - 7.04 - 8.86 - 6.24\n",
" 37000 - 43.4 - 6.97 - 8.80 - 6.31\n",
" 38000 - 43.8 - 6.91 - 8.73 - 6.37\n",
" 39000 - 44.2 - 6.84 - 8.67 - 6.43\n",
" 40000 - 44.6 - 6.78 - 8.61 - 6.50\n",
" 41000 - 45.0 - 6.72 - 8.55 - 6.56\n",
" 42000 - 45.4 - 6.66 - 8.49 - 6.61\n",
" 43000 - 45.7 - 6.60 - 8.43 - 6.67\n",
" 44000 - 46.1 - 6.54 - 8.37 - 6.73\n",
" 45000 - 46.4 - 6.50 - 8.32 - 6.78\n",
" 46000 - 46.8 - 6.44 - 8.27 - 6.83\n",
" 47000 - 47.1 - 6.39 - 8.22 - 6.88\n",
" 48000 - 47.4 - 6.34 - 8.17 - 6.93\n",
" 49000 - 47.7 - 6.30 - 8.12 - 6.98\n",
" 50000 - 48.0 - 6.25 - 8.08 - 7.02\n",
" 51000 - 48.3 - 6.21 - 8.03 - 7.07\n",
" 52000 - 48.6 - 6.16 - 7.99 - 7.11\n",
" 53000 - 48.9 - 6.11 - 7.94 - 7.16\n",
" 54000 - 49.1 - 6.08 - 7.90 - 7.20\n",
" 55000 - 49.4 - 6.04 - 7.87 - 7.24\n",
" 56000 - 49.6 - 6.00 - 7.83 - 7.28\n",
" 57000 - 49.9 - 5.96 - 7.79 - 7.31\n",
" 58000 - 50.1 - 5.92 - 7.75 - 7.35\n",
" 59000 - 50.4 - 5.89 - 7.71 - 7.39\n",
" 60000 - 50.6 - 5.85 - 7.68 - 7.42\n",
" 61000 - 50.8 - 5.82 - 7.65 - 7.46\n",
" 62000 - 51.0 - 5.78 - 7.61 - 7.50\n",
" 63000 - 51.2 - 5.76 - 7.58 - 7.52\n",
" 64000 - 51.4 - 5.72 - 7.55 - 7.55\n",
" 65000 - 51.6 - 5.69 - 7.52 - 7.58\n",
" 66000 - 51.8 - 5.67 - 7.49 - 7.61\n",
" 67000 - 52.0 - 5.64 - 7.46 - 7.64\n",
" 68000 - 52.1 - 5.61 - 7.44 - 7.67\n",
" 69000 - 52.3 - 5.58 - 7.41 - 7.69\n",
" 70000 - 52.5 - 5.56 - 7.38 - 7.72\n",
" 71000 - 52.7 - 5.52 - 7.35 - 7.75\n",
" 72000 - 52.8 - 5.51 - 7.33 - 7.77\n",
" 73000 - 53.0 - 5.48 - 7.31 - 7.79\n",
" 74000 - 53.1 - 5.46 - 7.29 - 7.81\n",
" 75000 - 53.2 - 5.44 - 7.27 - 7.84\n",
" 76000 - 53.4 - 5.42 - 7.24 - 7.86\n",
" 77000 - 53.5 - 5.40 - 7.22 - 7.88\n",
" 78000 - 53.7 - 5.37 - 7.20 - 7.90\n",
" 79000 - 53.8 - 5.35 - 7.18 - 7.92\n",
" 80000 - 54.0 - 5.33 - 7.15 - 7.95\n",
" 81000 - 54.0 - 5.32 - 7.14 - 7.96\n",
" 82000 - 54.1 - 5.30 - 7.12 - 7.98\n",
" 83000 - 54.3 - 5.28 - 7.11 - 7.99\n",
" 84000 - 54.4 - 5.26 - 7.09 - 8.01\n",
" 85000 - 54.5 - 5.25 - 7.07 - 8.03\n",
" 86000 - 54.6 - 5.23 - 7.06 - 8.04\n",
" 87000 - 54.7 - 5.22 - 7.04 - 8.06\n",
" 88000 - 54.8 - 5.20 - 7.03 - 8.07\n",
" 89000 - 54.9 - 5.18 - 7.01 - 8.10\n",
" 90000 - 55.0 - 5.17 - 7.00 - 8.10\n",
" 91000 - 55.1 - 5.16 - 6.98 - 8.12\n",
" 92000 - 55.1 - 5.14 - 6.97 - 8.13\n",
" 93000 - 55.2 - 5.13 - 6.96 - 8.14\n",
" 94000 - 55.3 - 5.12 - 6.94 - 8.16\n",
" 95000 - 55.4 - 5.11 - 6.93 - 8.17\n",
" 96000 - 55.5 - 5.09 - 6.92 - 8.18\n",
" 97000 - 55.5 - 5.08 - 6.91 - 8.19\n",
" 98000 - 55.7 - 5.06 - 6.89 - 8.21\n",
" 99000 - 55.7 - 5.06 - 6.89 - 8.21\n",
" 100000 - 55.7 - 5.05 - 6.88 - 8.22\n",
" 101000 - 55.8 - 5.04 - 6.87 - 8.24\n",
" 102000 - 55.9 - 5.03 - 6.86 - 8.24\n",
" 103000 - 55.9 - 5.02 - 6.85 - 8.26\n",
" 104000 - 56.0 - 5.01 - 6.84 - 8.26\n",
" 105000 - 56.1 - 5.00 - 6.83 - 8.27\n",
" 106000 - 56.1 - 4.99 - 6.82 - 8.28\n",
" 107000 - 56.2 - 4.98 - 6.81 - 8.30\n",
" 108000 - 56.2 - 4.98 - 6.80 - 8.30\n",
" 109000 - 56.3 - 4.97 - 6.80 - 8.31\n",
" 110000 - 56.3 - 4.96 - 6.79 - 8.31\n",
" 111000 - 56.4 - 4.95 - 6.78 - 8.32\n",
" 112000 - 56.5 - 4.94 - 6.76 - 8.34\n",
" 113000 - 56.5 - 4.94 - 6.76 - 8.34\n",
" 114000 - 56.5 - 4.93 - 6.76 - 8.34\n",
" 115000 - 56.6 - 4.92 - 6.75 - 8.35\n",
" 116000 - 56.6 - 4.91 - 6.74 - 8.36\n",
" 117000 - 56.6 - 4.91 - 6.74 - 8.36\n",
" 118000 - 56.7 - 4.91 - 6.73 - 8.37\n",
" 119000 - 56.7 - 4.90 - 6.73 - 8.38\n",
" 120000 - 56.8 - 4.89 - 6.72 - 8.38\n",
" 121000 - 56.8 - 4.88 - 6.71 - 8.39\n",
" 122000 - 56.8 - 4.88 - 6.71 - 8.39\n",
" 123000 - 56.9 - 4.88 - 6.70 - 8.40\n",
" 124000 - 56.9 - 4.87 - 6.70 - 8.40\n",
" 125000 - 56.9 - 4.87 - 6.69 - 8.41\n",
" 126000 - 57.0 - 4.86 - 6.69 - 8.41\n",
" 127000 - 57.0 - 4.86 - 6.68 - 8.42\n",
" 128000 - 57.0 - 4.85 - 6.68 - 8.42\n",
" 129000 - 57.0 - 4.85 - 6.67 - 8.43\n",
" 130000 - 57.1 - 4.84 - 6.66 - 8.44\n",
" 131000 - 57.1 - 4.84 - 6.67 - 8.44\n",
" 132000 - 57.1 - 4.84 - 6.66 - 8.44\n",
" 133000 - 57.1 - 4.83 - 6.66 - 8.44\n",
" 134000 - 57.2 - 4.83 - 6.66 - 8.45\n",
" 135000 - 57.2 - 4.82 - 6.64 - 8.46\n",
" 136000 - 57.2 - 4.82 - 6.65 - 8.45\n",
" 137000 - 57.2 - 4.82 - 6.64 - 8.46\n",
" 138000 - 57.3 - 4.81 - 6.64 - 8.46\n",
" 139000 - 57.3 - 4.80 - 6.63 - 8.47\n",
" 140000 - 57.3 - 4.81 - 6.63 - 8.47\n",
" 141000 - 57.3 - 4.80 - 6.63 - 8.47\n",
" 142000 - 57.3 - 4.80 - 6.63 - 8.47\n",
" 143000 - 57.4 - 4.80 - 6.63 - 8.48\n",
" 144000 - 57.4 - 4.79 - 6.62 - 8.49\n",
" 145000 - 57.4 - 4.79 - 6.62 - 8.48\n",
" 146000 - 57.4 - 4.79 - 6.62 - 8.48\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" 147000 - 57.4 - 4.79 - 6.61 - 8.49\n",
" 148000 - 57.4 - 4.79 - 6.61 - 8.49\n",
" 149000 - 57.5 - 4.77 - 6.60 - 8.50\n",
" 150000 - 57.5 - 4.78 - 6.61 - 8.49\n",
" 151000 - 57.5 - 4.78 - 6.61 - 8.50\n",
" 152000 - 57.5 - 4.78 - 6.60 - 8.50\n",
" 153000 - 57.6 - 4.77 - 6.59 - 8.51\n",
" 154000 - 57.5 - 4.77 - 6.60 - 8.50\n",
" 155000 - 57.5 - 4.77 - 6.60 - 8.50\n",
" 156000 - 57.6 - 4.77 - 6.59 - 8.51\n",
" 157000 - 57.6 - 4.77 - 6.59 - 8.51\n",
" 158000 - 57.6 - 4.76 - 6.58 - 8.52\n",
" 159000 - 57.6 - 4.76 - 6.59 - 8.51\n",
" 160000 - 57.6 - 4.76 - 6.59 - 8.51\n",
" 161000 - 57.6 - 4.76 - 6.59 - 8.52\n",
" 162000 - 57.7 - 4.75 - 6.58 - 8.52\n",
" 163000 - 57.6 - 4.76 - 6.58 - 8.52\n",
" 164000 - 57.6 - 4.75 - 6.58 - 8.52\n",
" 165000 - 57.7 - 4.75 - 6.58 - 8.52\n",
" 166000 - 57.7 - 4.75 - 6.58 - 8.52\n",
" 167000 - 57.7 - 4.74 - 6.57 - 8.53\n",
" 168000 - 57.7 - 4.75 - 6.58 - 8.53\n",
" 169000 - 57.7 - 4.75 - 6.57 - 8.53\n",
" 170000 - 57.7 - 4.75 - 6.57 - 8.53\n",
" 171000 - 57.7 - 4.75 - 6.57 - 8.53\n",
" 172000 - 57.7 - 4.74 - 6.57 - 8.53\n",
" 173000 - 57.7 - 4.74 - 6.57 - 8.53\n",
" 174000 - 57.7 - 4.74 - 6.57 - 8.53\n",
" 175000 - 57.7 - 4.74 - 6.57 - 8.53\n",
" 176000 - 57.8 - 4.73 - 6.56 - 8.54\n",
" 177000 - 57.7 - 4.74 - 6.57 - 8.54\n",
" 178000 - 57.8 - 4.74 - 6.56 - 8.54\n",
" 179000 - 57.8 - 4.74 - 6.56 - 8.54\n",
" 180000 - 57.8 - 4.74 - 6.56 - 8.54\n",
" 181000 - 57.8 - 4.73 - 6.55 - 8.55\n",
" 182000 - 57.8 - 4.73 - 6.56 - 8.54\n",
" 183000 - 57.8 - 4.73 - 6.56 - 8.54\n",
" 184000 - 57.8 - 4.73 - 6.56 - 8.54\n",
" 185000 - 57.8 - 4.73 - 6.55 - 8.55\n",
" 186000 - 57.8 - 4.73 - 6.56 - 8.54\n",
" 187000 - 57.8 - 4.73 - 6.56 - 8.54\n",
" 188000 - 57.8 - 4.73 - 6.56 - 8.54\n",
" 189000 - 57.8 - 4.73 - 6.56 - 8.55\n",
" 190000 - 57.9 - 4.72 - 6.55 - 8.55\n",
" 191000 - 57.8 - 4.73 - 6.55 - 8.55\n",
" 192000 - 57.8 - 4.73 - 6.55 - 8.55\n",
" 193000 - 57.8 - 4.73 - 6.55 - 8.55\n",
" 194000 - 57.8 - 4.73 - 6.55 - 8.55\n",
" 195000 - 57.8 - 4.72 - 6.55 - 8.55\n",
" 196000 - 57.8 - 4.73 - 6.55 - 8.55\n",
" 197000 - 57.8 - 4.72 - 6.55 - 8.55\n",
" 198000 - 57.8 - 4.73 - 6.55 - 8.55\n",
" 199000 - 57.9 - 4.72 - 6.54 - 8.56\n"
]
}
],
"source": [
"mem_used=[]\n",
"mem_used.append(psutil.virtual_memory().used/1024**3)\n",
"train_loader = DataLoader(train_data, batch_size=50,\n",
" shuffle=True,\n",
" drop_last=True,\n",
" pin_memory=False,\n",
" num_workers=8)\n",
"\n",
"for i, item in enumerate(train_loader):\n",
" if i % 1000 == 0:\n",
" mem = psutil.virtual_memory()\n",
" print(f'{i:8} - {mem.percent:5} - {mem.free/1024**3:10.2f} - {mem.available/1024**3:10.2f} - {mem.used/1024**3:10.2f}')\n",
" mem_used.append(mem.used/1024**3)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x7f2c0089e160>]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# mem use with fixed length array of type string_\n",
"plt.plot(np.array(mem_used))\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x7f57080b7940>]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# plot with array of type object\n",
"plt.plot(np.array(mem_used))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment