Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save danielpcox/b5b533c5134d26e1299e88690e432627 to your computer and use it in GitHub Desktop.
Save danielpcox/b5b533c5134d26e1299e88690e432627 to your computer and use it in GitHub Desktop.
scratch/memory_leak_train_nonproprietary.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true,
"ExecuteTime": {
"start_time": "2020-07-28T20:12:11.710866Z",
"end_time": "2020-07-28T20:12:12.366484Z"
}
},
"cell_type": "code",
"source": "import torch\nimport torch.nn as nn\nfrom torch.utils.data import Dataset, DataLoader\nimport torch.optim as optim\nfrom torch import tensor\nimport string\nimport random\n\nfrom concurrent.futures import ThreadPoolExecutor\nimport gc\nimport os\nprint(os.getpid())",
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": "24013\n",
"name": "stdout"
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Setup"
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2020-07-28T20:12:12.369421Z",
"end_time": "2020-07-28T20:12:12.373751Z"
},
"trusted": true
},
"cell_type": "code",
"source": "batch_size = 16\ndevice = torch.device('cpu')\nlearning_rate = 3e-3\ngrad_clip = 1.0\nepochs = 2",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2020-07-28T20:12:12.377434Z",
"end_time": "2020-07-28T20:12:12.380906Z"
},
"trusted": true
},
"cell_type": "code",
"source": "SOS = chr(2)\nEOS = chr(3)",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2020-07-28T20:12:12.383661Z",
"end_time": "2020-07-28T20:12:12.397211Z"
},
"trusted": true
},
"cell_type": "code",
"source": "class OnehotDataset(Dataset):\n def __init__(self, lines, device):\n super().__init__()\n \n self.device = device\n \n alphabet = list(string.printable)+[SOS,EOS]\n self.alphabet_len = len(alphabet)\n self.char2index = {c:i for i,c in enumerate(alphabet)}\n \n # lines reworked into sequences of ordinals wrt my alphabet\n self.lines = torch.nn.utils.rnn.pad_sequence([tensor([[self.char2index[c]] for c in line], dtype=torch.long, device=self.device)\n for line in lines], batch_first=True)\n self.lengths = [len(line) for line in lines]\n self.max_seq_len = max(len(line) for line in lines)\n \n def indices2onehots(self, inds):\n onehots = nn.functional.one_hot(inds, num_classes=self.alphabet_len)\n return onehots.squeeze(len(inds.shape)-1).float()\n \n def __getitem__(self, i):\n onehots = self.indices2onehots(self.lines[i])\n lengths = self.lengths[i]\n return onehots, lengths\n \n def __len__(self):\n return len(self.lines)",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2020-07-28T20:12:12.399900Z",
"end_time": "2020-07-28T20:12:12.415350Z"
},
"trusted": true
},
"cell_type": "code",
"source": "lines = [\n 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor',\n 'incididunt ut labore et dolore magna aliqua. Turpis egestas integer eget aliquet nibh praesent.',\n 'Purus in massa tempor nec feugiat nisl.',\n 'Suspendisse interdum consectetur libero id faucibus nisl tincidunt eget nullam. Aenean vel elit scelerisque mauris pellentesque.',\n 'Venenatis cras sed felis eget velit. Ornare quam viverra orci sagittis eu volutpat odio facilisis.',\n 'Elementum curabitur vitae nunc sed velit. Eu ultrices vitae auctor eu augue ut lectus arcu.',\n 'Sodales ut etiam sit amet nisl purus in. Mi quis hendrerit dolor magna.',\n 'A erat nam at lectus urna duis convallis convallis.',\n 'Ornare arcu dui vivamus arcu felis.',\n 'Sed felis eget velit aliquet sagittis id consectetur purus.',\n 'Nulla at volutpat diam ut venenatis tellus. Accumsan sit amet nulla facilisi morbi.'\n] * 182\n\nlen(lines)",
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 5,
"data": {
"text/plain": "2002"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2020-07-28T20:12:12.417766Z",
"end_time": "2020-07-28T20:12:12.514765Z"
},
"trusted": true
},
"cell_type": "code",
"source": "dataset = OnehotDataset(lines, device)",
"execution_count": 6,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2020-07-28T20:12:12.516734Z",
"end_time": "2020-07-28T20:12:12.519933Z"
},
"trusted": true
},
"cell_type": "code",
"source": "dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=False)",
"execution_count": 7,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Models"
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2020-07-28T20:12:12.522065Z",
"end_time": "2020-07-28T20:12:12.527533Z"
},
"trusted": true
},
"cell_type": "code",
"source": "class Encoder(nn.Module):\n def __init__(self, input_dim, hidden_dim, n_layers, dropout_perc):\n super().__init__()\n self.hidden_dim, self.n_layers = (hidden_dim, n_layers)\n \n self.rnn = nn.LSTM(input_dim,hidden_dim,n_layers,dropout=dropout_perc)\n \n def forward(self,x):\n outputs, (hidden, cell) = self.rnn(x)\n return hidden, cell",
"execution_count": 8,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2020-07-28T20:12:12.531271Z",
"end_time": "2020-07-28T20:12:12.537899Z"
},
"trusted": true
},
"cell_type": "code",
"source": "class Decoder(nn.Module):\n def __init__(self, output_dim, hidden_dim, n_layers, dropout_perc):\n super().__init__()\n self.hidden_dim, self.n_layers = (hidden_dim, n_layers)\n \n self.rnn = nn.LSTM(output_dim, hidden_dim, n_layers, dropout=dropout_perc)\n \n self.fc = nn.Linear(hidden_dim, output_dim)\n \n def forward(self, x, hidden, cell):\n output, (hidden, cell) = self.rnn(x, (hidden, cell))\n fc = self.fc(output)\n prediction = torch.nn.functional.softmax(fc, dim=2)\n return prediction, hidden, cell",
"execution_count": 9,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2020-07-28T20:12:12.540743Z",
"end_time": "2020-07-28T20:12:12.548864Z"
},
"trusted": true
},
"cell_type": "code",
"source": "class Autoencoder(nn.Module):\n def __init__(self, encoder, decoder, device):\n super().__init__()\n self.encoder, self.decoder = (encoder, decoder)\n \n def forward(self, x, unpacked=None, teacher_forcing_ratio=0.5):\n if unpacked is None: unpacked = x\n predictions = torch.zeros_like(unpacked).to(device)\n \n # Dump whole input sequence into encoder at once\n hidden, cell = self.encoder(x)\n \n# predictions, _, _ = self.decoder(unpacked, hidden, cell)\n \n # Start decoder with the first character of the true input (which is the \"start of string\" character)\n char = unpacked[None,0]\n predictions[0] = char\n \n sequence_length = unpacked.shape[0]\n for i in range(1,sequence_length):\n # char shape [seq, batch, alphabet]\n prediction, hidden, cell = self.decoder(char, hidden, cell)\n\n predictions[i] = prediction\n \n indices = prediction.argmax(2).unsqueeze(2)\n char = dataset.indices2onehots(indices)\n\n return predictions",
"execution_count": 10,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Train"
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2020-07-28T20:12:12.550947Z",
"end_time": "2020-07-28T20:12:12.556901Z"
},
"trusted": true
},
"cell_type": "code",
"source": "def setup():\n LEN_ALPHABET = dataset.alphabet_len\n HID_DIM = 64\n N_LAYERS = 4 # encoder n_layers must match decoder n_layers\n \n t_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=False)\n\n enc = Encoder(input_dim=LEN_ALPHABET, hidden_dim=HID_DIM, n_layers=N_LAYERS, dropout_perc=0.5)\n dec = Decoder(output_dim=LEN_ALPHABET, hidden_dim=HID_DIM, n_layers=N_LAYERS, dropout_perc=0.5)\n model = Autoencoder(enc, dec, device).to(device)\n \n optimizer = optim.Adam(model.parameters(), lr=learning_rate)\n\n return model, optimizer, t_dataloader",
"execution_count": 11,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2020-07-28T20:12:12.558976Z",
"end_time": "2020-07-28T20:12:12.566329Z"
},
"trusted": true
},
"cell_type": "code",
"source": "def train(model, optimizer, dataloader, epochs, job=0):\n print(f'JOB {job} has begun training')\n criterion = nn.BCELoss()\n model.train()\n for epoch in range(epochs):\n for i, (batch, lengths) in enumerate(dataloader):\n batch = batch.permute(1,0,2)\n packed_batch = torch.nn.utils.rnn.pack_padded_sequence(batch, lengths, enforce_sorted=False)\n model.zero_grad()\n output = model(packed_batch, unpacked=batch)\n train_loss = criterion(output, batch)\n train_loss.backward()\n torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)\n optimizer.step()\n print(f'JOB {job}: epoch:{epoch}. Last loss:{train_loss.item()}')\n gc.collect()",
"execution_count": 12,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2020-07-28T20:12:12.568411Z",
"end_time": "2020-07-28T20:12:12.571868Z"
},
"trusted": true
},
"cell_type": "code",
"source": "print(os.getpid())",
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"text": "24013\n",
"name": "stdout"
}
]
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2020-07-28T20:12:11.731Z"
},
"trusted": true
},
"cell_type": "code",
"source": "executor = ThreadPoolExecutor(max_workers=3)\nfutures = []\nfor job in range(6):\n t_model, t_optimizer, t_dataloader = setup()\n futures.append( executor.submit(train, t_model, t_optimizer, t_dataloader, epochs, job) )\n\nfor f in futures:\n f.result()",
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": "JOB 0 has begun training\nJOB 1 has begun training\nJOB 2 has begun training\nJOB 0: epoch:0. Last loss:0.02211318351328373\nJOB 1: epoch:0. Last loss:0.009625717997550964\nJOB 2: epoch:0. Last loss:0.025440813973546028\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "del futures\ndel t_model\ndel t_optimizer\ndel t_dataloader\ndel dataloader\ndel dataset\ndel executor",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"start_time": "2020-07-28T20:12:11.733Z"
},
"trusted": true
},
"cell_type": "code",
"source": "import ctypes\nimport ctypes.util\nlibc = ctypes.CDLL(ctypes.util.find_library('c'))\nlibc.malloc_trim(ctypes.c_int(0))",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"name": "lad",
"display_name": "lad",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.8.3",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"varInspector": {
"window_display": false,
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"library": "var_list.py",
"delete_cmd_prefix": "del ",
"delete_cmd_postfix": "",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"library": "var_list.r",
"delete_cmd_prefix": "rm(",
"delete_cmd_postfix": ") ",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
]
},
"gist": {
"id": "",
"data": {
"description": "scratch/memory_leak_train_nonproprietary.ipynb",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment