Skip to content

Instantly share code, notes, and snippets.

@kylemcdonald
Last active June 12, 2020 21:34
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save kylemcdonald/2d06dc736789f0b329e11d504e8dee9f to your computer and use it in GitHub Desktop.
Save kylemcdonald/2d06dc736789f0b329e11d504e8dee9f to your computer and use it in GitHub Desktop.
Recreating char-rnn from the spro/practical-pytorch tutorials.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import random\n",
"import time\n",
"import math\n",
"from IPython.display import clear_output\n",
"import matplotlib.pyplot as plt\n",
"\n",
"use_cuda = torch.cuda.is_available()\n",
"use_cuda"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"seq_length = 50\n",
"batch_size = 50\n",
"hidden_size = 128\n",
"epoch_count = 10\n",
"n_layers = 2\n",
"lr = 2e-3\n",
"input_filename = 'tiny-shakespeare.txt'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"text = open(input_filename).read() #.lower()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"chars = set(text)\n",
"chars_len = len(chars)\n",
"char_to_index = {}\n",
"index_to_char = {}\n",
"for i, c in enumerate(chars):\n",
" char_to_index[c] = i\n",
" index_to_char[i] = c"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[range(0, 3), range(3, 6), range(6, 9)]\n"
]
}
],
"source": [
"def time_since(since):\n",
" s = time.time() - since\n",
" m = math.floor(s / 60)\n",
" s -= m * 60\n",
" return '%dm %ds' % (m, s)\n",
"\n",
"def chunks(l, n):\n",
" for i in range(0, len(l) - n, n):\n",
" yield l[i:i + n]\n",
"\n",
"print(list(chunks(range(11), 3)))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([51, 50])\n"
]
}
],
"source": [
"# convert all characters to indices\n",
"batches = [char_to_index[char] for char in text]\n",
"\n",
"# chunk into sequences of length seq_length + 1\n",
"batches = list(chunks(batches, seq_length + 1))\n",
"\n",
"# chunk sequences into batches\n",
"batches = list(chunks(batches, batch_size))\n",
"\n",
"# convert batches to tensors and transpose\n",
"batches = [torch.LongTensor(batch).transpose_(0,1) for batch in batches]\n",
"\n",
"# each batch is (sequence_length + 1) x batch_size\n",
"print(batches[0].size())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"class RNN(nn.Module):\n",
" def __init__(self, input_size, hidden_size, output_size, n_layers, batch_size):\n",
" super(RNN, self).__init__()\n",
" self.input_size = input_size\n",
" self.hidden_size = hidden_size\n",
" self.output_size = output_size\n",
" self.n_layers = n_layers\n",
" self.batch_size = batch_size\n",
" \n",
" self.encoder = nn.Embedding(input_size, hidden_size)\n",
" self.cells = nn.GRU(hidden_size, hidden_size, n_layers)\n",
" self.decoder = nn.Linear(hidden_size, output_size)\n",
" \n",
" def forward(self, input, hidden):\n",
" input = self.encoder(input)\n",
" output, hidden = self.cells(input, hidden)\n",
" output = self.decoder(output.view(output.size(0) * output.size(1), output.size(2)))\n",
" return output, hidden\n",
" \n",
" def create_hidden(self):\n",
" # should this be small random instead of zeros\n",
" # should this also be stored in the class rather than being passed around?\n",
" return torch.zeros(self.n_layers, self.batch_size, self.hidden_size)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Duration: 0m 31s\n",
"Epoch: 10/10\n",
"Batch: 435/437, 139.46/s\n",
"Loss: 1.42\n",
"\n"
]
}
],
"source": [
"print_every = 5\n",
"\n",
"model = RNN(chars_len, hidden_size, chars_len, n_layers, batch_size)\n",
"optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n",
"loss_function = nn.CrossEntropyLoss()\n",
"hidden = model.create_hidden()\n",
"\n",
"if use_cuda:\n",
" model = model.cuda()\n",
" hidden = hidden.cuda()\n",
"\n",
"start = time.time()\n",
"all_losses = []\n",
"\n",
"format_string = \\\n",
"\"\"\"\n",
"Duration: {duration}\n",
"Epoch: {epoch}/{epoch_count}\n",
"Batch: {batch}/{batch_count}, {batch_rate:.2f}/s\n",
"Loss: {loss:.2f}\n",
"\"\"\"\n",
"\n",
"try:\n",
" for epoch in range(1, epoch_count + 1):\n",
" random.shuffle(batches)\n",
" for batch, batch_tensor in enumerate(batches):\n",
" if use_cuda:\n",
" batch_tensor = batch_tensor.cuda()\n",
" \n",
" # reset the model\n",
" model.zero_grad()\n",
" \n",
" # everything except the last\n",
" input_variable = batch_tensor[:-1]\n",
" \n",
" # everything except the first, flattened\n",
" target_variable = batch_tensor[1:].view(-1)\n",
" \n",
" # prediction and calculate loss\n",
" output, _ = model(input_variable, hidden)\n",
" loss = loss_function(output, target_variable)\n",
"\n",
" # backprop and optimize\n",
" loss.backward()\n",
" optimizer.step()\n",
" \n",
" all_losses.append(loss.item())\n",
"\n",
" if print_every > 0 and batch % print_every == 0:\n",
" clear_output(wait=True)\n",
" batch_count = len(batches)\n",
" batch_rate = ((batch_count * (epoch - 1)) + batch) / (time.time() - start)\n",
" print(format_string.format(duration=time_since(start),\n",
" epoch=epoch,\n",
" epoch_count=epoch_count,\n",
" batch=batch,\n",
" batch_count=batch_count,\n",
" batch_rate=batch_rate,\n",
" loss=loss))\n",
" \n",
"except KeyboardInterrupt:\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"image/png": {
"height": 248,
"width": 372
},
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.plot(all_losses)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"def index_to_tensor(index):\n",
" tensor = torch.zeros(1, 1).long()\n",
" tensor[0,0] = index\n",
" return tensor\n",
"\n",
"# print(index_to_tensor(10))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": [
"def evaluate(prime_str='A', predict_len=100, temperature=0.8):\n",
" model.batch_size = 1\n",
" hidden = model.create_hidden()\n",
" \n",
" if use_cuda:\n",
" hidden = hidden.cuda()\n",
" \n",
" prime_tensors = [index_to_tensor(char_to_index[char]) for char in prime_str]\n",
" \n",
" if use_cuda:\n",
" prime_tensors = [tensor.cuda() for tensor in prime_tensors]\n",
"\n",
" for prime_tensor in prime_tensors[:-2]:\n",
" _, hidden = model(prime_tensor, hidden)\n",
" \n",
" inp = prime_tensors[-1]\n",
" predicted = prime_str\n",
" for p in range(predict_len):\n",
" if use_cuda:\n",
" inp = inp.cuda()\n",
" \n",
" output, hidden = model(inp, hidden)\n",
" \n",
" # Sample from the network as a multinomial distribution\n",
" output_dist = output.data.view(-1).div(temperature).exp()\n",
" top_i = torch.multinomial(output_dist, 1)[0]\n",
" \n",
" # Add predicted character to string and use as next input\n",
" predicted_char = index_to_char[top_i.item()]\n",
" predicted += predicted_char\n",
" inp = index_to_tensor(char_to_index[predicted_char])\n",
"\n",
" return predicted"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Thave numberland and justice of virtue;\n",
"Which on them spiteof and forsworn,\n",
"That sorrow to the news in against their eyes, and it not like proved\n",
"That all the revenge: would so long to this lips be this brother,\n",
"I one stard it.\n",
"\n",
"BUCKINGHAM:\n",
"It is near, or else say 'Will, sir, sir: the commonfessions leave one beauterly,\n",
"And his near to defend thine;\n",
"Sour tale his father, catched thee for the back\n",
"Upongment of live and posite the faults.\n",
"\n",
"Second Servant:\n",
"The more sworn about the fire to big and so \n"
]
}
],
"source": [
"print(evaluate('Th', 500, temperature=0.8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "torch-tf2",
"language": "python",
"name": "torch-tf2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment