Skip to content

Instantly share code, notes, and snippets.

Last active January 15, 2020 21:05
Show Gist options
  • Save gokulanv/73839706217c6f44dace8e45ba631fd8 to your computer and use it in GitHub Desktop.
Save gokulanv/73839706217c6f44dace8e45ba631fd8 to your computer and use it in GitHub Desktop.
An effort to build a RNN from scratch!
Display the source blob
Display the rendered blob
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "RNNExercise.ipynb",
"provenance": [],
"toc_visible": true,
"include_colab_link": true
"kernelspec": {
"name": "python2",
"display_name": "Python 2"
"accelerator": "GPU"
"cells": [
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
"source": [
"<a href=\"\" target=\"_parent\"><img src=\"\" alt=\"Open In Colab\"/></a>"
"cell_type": "code",
"metadata": {
"id": "p_XCXrGOprdR",
"colab_type": "code",
"colab": {}
"source": [
"import csv\n",
"import itertools\n",
"import operator\n",
"import numpy as np\n",
"import nltk\n",
"import sys\n",
"from datetime import datetime\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline"
"execution_count": 0,
"outputs": []
"cell_type": "code",
"metadata": {
"id": "xcWSf7dPp3B6",
"colab_type": "code",
"outputId": "452fc411-d605-49a7-a3e0-0c94fd0e2e53",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
"source": [
"execution_count": 0,
"outputs": [
"output_type": "stream",
"text": [
"[nltk_data] Downloading collection u'book'\n",
"[nltk_data] | \n",
"[nltk_data] | Downloading package abc to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package brown to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package chat80 to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package cmudict to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package conll2000 to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package conll2002 to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package dependency_treebank to\n",
"[nltk_data] | /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package genesis to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package gutenberg to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package ieer to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package inaugural to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package movie_reviews to\n",
"[nltk_data] | /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package nps_chat to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package names to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package ppattach to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package reuters to /root/nltk_data...\n",
"[nltk_data] | Downloading package senseval to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package state_union to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package stopwords to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package swadesh to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package timit to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package treebank to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package toolbox to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package udhr to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package udhr2 to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package unicode_samples to\n",
"[nltk_data] | /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package webtext to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package wordnet to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package wordnet_ic to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package words to /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package maxent_treebank_pos_tagger to\n",
"[nltk_data] | /root/nltk_data...\n",
"[nltk_data] | Unzipping taggers/\n",
"[nltk_data] | Downloading package maxent_ne_chunker to\n",
"[nltk_data] | /root/nltk_data...\n",
"[nltk_data] | Unzipping chunkers/\n",
"[nltk_data] | Downloading package universal_tagset to\n",
"[nltk_data] | /root/nltk_data...\n",
"[nltk_data] | Unzipping taggers/\n",
"[nltk_data] | Downloading package punkt to /root/nltk_data...\n",
"[nltk_data] | Unzipping tokenizers/\n",
"[nltk_data] | Downloading package book_grammars to\n",
"[nltk_data] | /root/nltk_data...\n",
"[nltk_data] | Unzipping grammars/\n",
"[nltk_data] | Downloading package city_database to\n",
"[nltk_data] | /root/nltk_data...\n",
"[nltk_data] | Unzipping corpora/\n",
"[nltk_data] | Downloading package tagsets to /root/nltk_data...\n",
"[nltk_data] | Unzipping help/\n",
"[nltk_data] | Downloading package panlex_swadesh to\n",
"[nltk_data] | /root/nltk_data...\n",
"[nltk_data] | Downloading package averaged_perceptron_tagger to\n",
"[nltk_data] | /root/nltk_data...\n",
"[nltk_data] | Unzipping taggers/\n",
"[nltk_data] | \n",
"[nltk_data] Done downloading collection book\n"
"name": "stdout"
"output_type": "execute_result",
"data": {
"text/plain": [
"metadata": {
"tags": []
"execution_count": 2
"cell_type": "code",
"metadata": {
"id": "Gy0rm0omp851",
"colab_type": "code",
"outputId": "6098587e-6c0a-47bf-c358-eaba26fdb356",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 188
"source": [
"vocabulary_size = 8000\n",
"unknown_token = \"UNKNOWN_TOKEN\"\n",
"sentence_start_token = \"SENTENCE_START\"\n",
"sentence_end_token = \"SENTENCE_END\"\n",
"# Read the data and append SENTENCE_START and SENTENCE_END tokens\n",
"print \"Reading CSV file...\"\n",
"with open('reddit-comments-2015-08.csv', 'rb') as f:\n",
" reader = csv.reader(f, skipinitialspace=True)\n",
" # Split full comments into sentences\n",
" sentences = itertools.chain(*[nltk.sent_tokenize(x[0].decode('utf-8').lower()) for x in reader])\n",
" sentences = [\"%s %s %s\" % (sentence_start_token, x, sentence_end_token) for x in sentences]\n",
"print \"Parsed %d sentences.\" % (len(sentences))\n",
" \n",
"# Tokenize the sentences into words\n",
"tokenized_sentences = [nltk.word_tokenize(sent) for sent in sentences]\n",
"# Count the word frequencies\n",
"word_freq = nltk.FreqDist(itertools.chain(*tokenized_sentences))\n",
"print \"Found %d unique words tokens.\" % len(word_freq.items())\n",
"# Get the most common words and build index_to_word and word_to_index vectors\n",
"vocab = word_freq.most_common(vocabulary_size-1)\n",
"index_to_word = [x[0] for x in vocab]\n",
"word_to_index = dict([(w,i) for i,w in enumerate(index_to_word)])\n",
"print \"Using vocabulary size %d.\" % vocabulary_size\n",
"print \"The least frequent word in our vocabulary is '%s' and appeared %d times.\" % (vocab[-1][0], vocab[-1][1])\n",
"# Replace all words not in our vocabulary with the unknown token\n",
"for i, sent in enumerate(tokenized_sentences):\n",
" tokenized_sentences[i] = [w if w in word_to_index else unknown_token for w in sent]\n",
"print \"\\nExample sentence: '%s'\" % sentences[0]\n",
"print \"\\nExample sentence after Pre-processing: '%s'\" % tokenized_sentences[0]"
"execution_count": 0,
"outputs": [
"output_type": "stream",
"text": [
"Reading CSV file...\n",
"Parsed 79170 sentences.\n",
"Found 65498 unique words tokens.\n",
"Using vocabulary size 8000.\n",
"The least frequent word in our vocabulary is 'traction' and appeared 10 times.\n",
"Example sentence: 'SENTENCE_START i joined a new league this year and they have different scoring rules than i'm used to. SENTENCE_END'\n",
"Example sentence after Pre-processing: '[u'SENTENCE_START', u'i', u'joined', u'a', u'new', u'league', u'this', u'year', u'and', u'they', u'have', u'different', u'scoring', u'rules', u'than', u'i', u\"'m\", u'used', u'to', u'.', u'SENTENCE_END']'\n"
"name": "stdout"
"cell_type": "code",
"metadata": {
"id": "nmmr5wrUqBOL",
"colab_type": "code",
"colab": {}
"source": [
"# Create the training data\n",
"X_train = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences])\n",
"y_train = np.asarray([[word_to_index[w] for w in sent[1:]] for sent in tokenized_sentences])"
"execution_count": 0,
"outputs": []
"cell_type": "code",
"metadata": {
"id": "fO9en4KBqnZm",
"colab_type": "code",
"outputId": "aa734323-8246-4dfb-b14d-267816266eab",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 134
"source": [
"x_example, y_example = X_train[17], y_train[17]\n",
"print \"x:\\n%s\\n%s\" % (\" \".join([index_to_word[x] for x in x_example]), x_example)\n",
"print \"\\ny:\\n%s\\n%s\" % (\" \".join([index_to_word[x] for x in y_example]), y_example)"
"execution_count": 0,
"outputs": [
"output_type": "stream",
"text": [
"SENTENCE_START what are n't you understanding about this ? !\n",
"[0, 51, 27, 16, 10, 861, 54, 25, 34, 69]\n",
"what are n't you understanding about this ? ! SENTENCE_END\n",
"[51, 27, 16, 10, 861, 54, 25, 34, 69, 1]\n"
"name": "stdout"
"cell_type": "code",
"metadata": {
"id": "e6kKpX5oAa3C",
"colab_type": "code",
"colab": {}
"source": [
"def softmax(x):\n",
" xt = np.exp(x - np.max(x))\n",
" return xt / np.sum(xt)\n",
"def save_model_parameters_theano(outfile, model):\n",
" U, V, W = model.U.get_value(), model.V.get_value(), model.W.get_value()\n",
" np.savez(outfile, U=U, V=V, W=W)\n",
" print \"Saved model parameters to %s.\" % outfile\n",
" \n",
"def load_model_parameters_theano(path, model):\n",
" npzfile = np.load(path)\n",
" U, V, W = npzfile[\"U\"], npzfile[\"V\"], npzfile[\"W\"]\n",
" model.hidden_dim = U.shape[0]\n",
" model.word_dim = U.shape[1]\n",
" model.U.set_value(U)\n",
" model.V.set_value(V)\n",
" model.W.set_value(W)\n",
" print \"Loaded model parameters from %s. hidden_dim=%d word_dim=%d\" % (path, U.shape[0], U.shape[1])"
"execution_count": 0,
"outputs": []
"cell_type": "code",
"metadata": {
"id": "LRNjEp6Jq3bA",
"colab_type": "code",
"colab": {}
"source": [
"class RNNPy:\n",
" def __init__(self, word_dim, hidden_dim = 100, bptt_truncate = 4):\n",
" self.word_dim = word_dim\n",
" self.hidden_dim = hidden_dim\n",
" self.bptt_truncate = bptt_truncate\n",
" self.U = np.random.uniform(-1/np.sqrt(word_dim), 1/np.sqrt(word_dim), (hidden_dim, word_dim))\n",
" self.V = np.random.uniform(-1/np.sqrt(hidden_dim), 1/np.sqrt(hidden_dim), (word_dim, hidden_dim))\n",
" self.W = np.random.uniform(-1/np.sqrt(hidden_dim), 1/np.sqrt(hidden_dim), (hidden_dim, hidden_dim))"
"execution_count": 0,
"outputs": []
"cell_type": "code",
"metadata": {
"id": "P8dIiXgZvTHF",
"colab_type": "code",
"colab": {}
"source": [
"def forward_prop(self, x):\n",
" # x is similar to batch_size\n",
" T = len(x)\n",
" s = np.zeros((T+1, self.hidden_dim))\n",
" o = np.zeros((T, self.word_dim))\n",
" for i in range(1, T):\n",
" s[i] = np.tanh(self.U[:,x[i]] +, s[i-1]))\n",
" o[i] = softmax(, s[i]))\n",
" return [o,s]\n",
"RNNPy.forward_prop = forward_prop\n"
"execution_count": 0,
"outputs": []
"cell_type": "code",
"metadata": {
"id": "a9TtQ6xgAzNx",
"colab_type": "code",
"colab": {}
"source": [
"def predict(self, x):\n",
" # Perform forward propagation and return index of the highest score\n",
" o, s = self.forward_prop(x)\n",
" return np.argmax(o, axis=1)\n",
"RNNPy.predict = predict"
"execution_count": 0,
"outputs": []
"cell_type": "code",
"metadata": {
"id": "UC5ehxCfa3lU",
"colab_type": "code",
"outputId": "a9f5b7ba-3408-41ef-8762-9bd4613ea802",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 151
"source": [
"model = RNNPy(vocabulary_size)\n",
"o, s = model.forward_prop(X_train[10])\n",
"print o.shape\n",
"print o"
"execution_count": 0,
"outputs": [
"output_type": "stream",
"text": [
"(45, 8000)\n",
"[[0. 0. 0. ... 0. 0. 0. ]\n",
" [0.0001254 0.00012548 0.00012455 ... 0.00012493 0.00012458 0.00012458]\n",
" [0.00012389 0.00012525 0.00012473 ... 0.00012546 0.0001259 0.00012535]\n",
" ...\n",
" [0.00012406 0.00012463 0.00012539 ... 0.00012617 0.00012463 0.00012589]\n",
" [0.00012547 0.00012431 0.00012485 ... 0.00012427 0.00012611 0.00012472]\n",
" [0.00012482 0.00012529 0.00012477 ... 0.00012488 0.00012508 0.0001267 ]]\n"
"name": "stdout"
"cell_type": "code",
"metadata": {
"id": "KCfcuK2Wa3ZB",
"colab_type": "code",
"outputId": "65c94c38-b6f5-47f4-dc03-bc571d054051",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 101
"source": [
"predictions = model.predict(X_train[10])\n",
"print predictions.shape\n",
"print predictions"
"execution_count": 0,
"outputs": [
"output_type": "stream",
"text": [
"[ 0 2048 7434 7430 1013 3562 7366 1627 2212 3251 7299 6722 565 238\n",
" 2539 21 6548 261 5274 2082 1835 5376 3522 477 7051 7352 7715 3822\n",
" 6914 5059 3850 6176 743 2082 5561 2182 6569 2800 2752 6821 4437 7021\n",
" 6399 6912 3922]\n"
"name": "stdout"
"cell_type": "code",
"metadata": {
"id": "Cmt4Ohnsa3JC",
"colab_type": "code",
"colab": {}
"source": [
"def calculate_total_loss(self, x, y):\n",
" L = 0\n",
" # For each sentence...\n",
" for i in np.arange(len(y)):\n",
" o, s = self.forward_prop(x[i])\n",
" # We only care about our prediction of the \"correct\" words\n",
" correct_word_predictions = o[np.arange(len(y[i])), y[i]]\n",
" # Add to the loss based on how off we were\n",
" temp = np.sum(np.log(correct_word_predictions))\n",
" L += -1 * temp\n",
" return L\n",
"def calculate_loss(self, x, y):\n",
" # Divide the total loss by the number of training examples\n",
" N = np.sum((len(y_i) for y_i in y))\n",
" return self.calculate_total_loss(x,y)/N\n",
"RNNPy.calculate_total_loss = calculate_total_loss\n",
"RNNPy.calculate_loss = calculate_loss"
"execution_count": 0,
"outputs": []
"cell_type": "code",
"metadata": {
"id": "f_PuasRRa22u",
"colab_type": "code",
"outputId": "84405546-1ff6-47a3-b1a8-234307dada23",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 138
"source": [
"# Limit to 1000 examples to save time\n",
"print \"Expected Loss for random predictions: %f\" % np.log(vocabulary_size)\n",
"print \"Actual loss: %f\" % model.calculate_loss(X_train[:1000], y_train[:1000])"
"execution_count": 0,
"outputs": [
"output_type": "stream",
"text": [
"Expected Loss for random predictions: 8.987197\n"
"name": "stdout"
"output_type": "stream",
"text": [
"/usr/local/lib/python2.7/dist-packages/ DeprecationWarning: Calling np.sum(generator) is deprecated, and in the future will give a different result. Use np.sum(np.fromiter(generator)) or the python sum builtin instead.\n",
" from ipykernel import kernelapp as app\n",
"/usr/local/lib/python2.7/dist-packages/ RuntimeWarning: divide by zero encountered in log\n",
" if __name__ == '__main__':\n"
"name": "stderr"
"output_type": "stream",
"text": [
"Actual loss: inf\n"
"name": "stdout"
"cell_type": "markdown",
"metadata": {
"id": "Z8YNtU7MeZIC",
"colab_type": "text"
"source": [
"#Backpropagation through time - BPTT"
"cell_type": "code",
"metadata": {
"id": "TW0YcO5N2k5d",
"colab_type": "code",
"colab": {}
"source": [
"def bptt(self, x, y):\n",
" T = len(y)\n",
" # Perform forward propagation\n",
" o, s = self.forward_prop(x)\n",
" # We accumulate the gradients in these variables\n",
" dLdU = np.zeros(self.U.shape)\n",
" dLdV = np.zeros(self.V.shape)\n",
" dLdW = np.zeros(self.W.shape)\n",
" delta_o = o\n",
" delta_o[np.arange(len(y)), y] -= 1.\n",
" # For each output backwards...\n",
" for t in np.arange(T)[::-1]:\n",
" dLdV += np.outer(delta_o[t], s[t].T)\n",
" # Initial delta calculation\n",
" delta_t =[t]) * (1 - (s[t] ** 2))\n",
" # Backpropagation through time (for at most self.bptt_truncate steps)\n",
" for bptt_step in np.arange(max(0, t-self.bptt_truncate), t+1)[::-1]:\n",
" # print \"Backpropagation step t=%d bptt step=%d \" % (t, bptt_step)\n",
" dLdW += np.outer(delta_t, s[bptt_step-1]) \n",
" dLdU[:,x[bptt_step]] += delta_t\n",
" # Update delta for next step\n",
" delta_t = * (1 - s[bptt_step-1] ** 2)\n",
" return [dLdU, dLdV, dLdW]\n",
"RNNPy.bptt = bptt"
"execution_count": 0,
"outputs": []
"cell_type": "markdown",
"metadata": {
"id": "gIQAuc5aegP3",
"colab_type": "text"
"source": [
"#Gradient Checking"
"cell_type": "code",
"metadata": {
"id": "YkChTWb3QPRM",
"colab_type": "code",
"outputId": "e683a2d3-37ab-4015-f498-2dcfb5973f09",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 168
"source": [
"def gradient_check(self, x, y, h=0.001, error_threshold=0.01):\n",
" # Calculate the gradients using backpropagation. We want to checker if these are correct.\n",
" bptt_gradients = model.bptt(x, y)\n",
" # List of all parameters we want to check.\n",
" model_parameters = ['U', 'V', 'W']\n",
" # Gradient check for each parameter\n",
" for pidx, pname in enumerate(model_parameters):\n",
" # Get the actual parameter value from the mode, e.g. model.W\n",
" parameter = operator.attrgetter(pname)(self)\n",
" print \"Performing gradient check for parameter %s with size %d.\" % (pname,\n",
" # Iterate over each element of the parameter matrix, e.g. (0,0), (0,1), ...\n",
" it = np.nditer(parameter, flags=['multi_index'], op_flags=['readwrite'])\n",
" while not it.finished:\n",
" ix = it.multi_index\n",
" # Save the original value so we can reset it later\n",
" original_value = parameter[ix]\n",
" # Estimate the gradient using (f(x+h) - f(x-h))/(2*h)\n",
" parameter[ix] = original_value + h\n",
" gradplus = model.calculate_total_loss([x],[y])\n",
" parameter[ix] = original_value - h\n",
" gradminus = model.calculate_total_loss([x],[y])\n",
" estimated_gradient = (gradplus - gradminus)/(2*h)\n",
" # Reset parameter to original value\n",
" parameter[ix] = original_value\n",
" # The gradient for this parameter calculated using backpropagation\n",
" backprop_gradient = bptt_gradients[pidx][ix]\n",
" # calculate The relative error: (|x - y|/(|x| + |y|))\n",
" relative_error = np.abs(backprop_gradient - estimated_gradient)/(np.abs(backprop_gradient) + np.abs(estimated_gradient))\n",
" # If the error is to large fail the gradient check\n",
" if relative_error > error_threshold:\n",
" print \"Gradient Check ERROR: parameter=%s ix=%s\" % (pname, ix)\n",
" print \"+h Loss: %f\" % gradplus\n",
" print \"-h Loss: %f\" % gradminus\n",
" print \"Estimated_gradient: %f\" % estimated_gradient\n",
" print \"Backpropagation gradient: %f\" % backprop_gradient\n",
" print \"Relative Error: %f\" % relative_error\n",
" return \n",
" it.iternext()\n",
" print \"Gradient check for parameter %s passed.\" % (pname)\n",
"RNNPy.gradient_check = gradient_check\n",
"# To avoid performing millions of expensive calculations we use a smaller vocabulary size for checking.\n",
"grad_check_vocab_size = 100\n",
"model = RNNPy(grad_check_vocab_size, 10, bptt_truncate=1000)\n",
"model.gradient_check([0,1,2,3], [1,2,3,4])"
"execution_count": 0,
"outputs": [
"output_type": "stream",
"text": [
"Performing gradient check for parameter U with size 1000.\n",
"Gradient check for parameter U passed.\n",
"Performing gradient check for parameter V with size 1000.\n"
"name": "stdout"
"output_type": "stream",
"text": [
"/usr/local/lib/python2.7/dist-packages/ RuntimeWarning: divide by zero encountered in log\n",
" if __name__ == '__main__':\n",
"/usr/local/lib/python2.7/dist-packages/ RuntimeWarning: invalid value encountered in double_scalars\n"
"name": "stderr"
"output_type": "stream",
"text": [
"Gradient check for parameter V passed.\n",
"Performing gradient check for parameter W with size 100.\n",
"Gradient check for parameter W passed.\n"
"name": "stdout"
"cell_type": "markdown",
"metadata": {
"id": "BLVi8OJIe2j4",
"colab_type": "text"
"source": [
"## Stochastic gradient descent implementation"
"cell_type": "code",
"metadata": {
"id": "P190PPNQerJV",
"colab_type": "code",
"colab": {}
"source": [
"# Performs one step of SGD.\n",
"def numpy_sdg_step(self, x, y, learning_rate):\n",
" # Calculate the gradients\n",
" dLdU, dLdV, dLdW = self.bptt(x, y)\n",
" # Change parameters according to gradients and learning rate\n",
" self.U -= learning_rate * dLdU\n",
" self.V -= learning_rate * dLdV\n",
" self.W -= learning_rate * dLdW\n",
"RNNPy.sgd_step = numpy_sdg_step"
"execution_count": 0,
"outputs": []
"cell_type": "code",
"metadata": {
"id": "D53uJ_gee8oe",
"colab_type": "code",
"colab": {}
"source": [
"# Outer SGD Loop\n",
"# - model: The RNN model instance\n",
"# - X_train: The training data set\n",
"# - y_train: The training data labels\n",
"# - learning_rate: Initial learning rate for SGD\n",
"# - nepoch: Number of times to iterate through the complete dataset\n",
"# - evaluate_loss_after: Evaluate the loss after this many epochs\n",
"def train_with_sgd(model, X_train, y_train, learning_rate=0.005, nepoch=100, evaluate_loss_after=5):\n",
" # We keep track of the losses so we can plot them later\n",
" losses = []\n",
" num_examples_seen = 0\n",
" for epoch in range(nepoch):\n",
" # Optionally evaluate the loss\n",
" if (epoch % evaluate_loss_after == 0):\n",
" loss = model.calculate_loss(X_train, y_train)\n",
" losses.append((num_examples_seen, loss))\n",
" time ='%Y-%m-%d %H:%M:%S')\n",
" print \"%s: Loss after num_examples_seen=%d epoch=%d: %f\" % (time, num_examples_seen, epoch, loss)\n",
" # Adjust the learning rate if loss increases\n",
" if (len(losses) > 1 and losses[-1][1] > losses[-2][1]):\n",
" learning_rate = learning_rate * 0.5 \n",
" print \"Setting learning rate to %f\" % learning_rate\n",
" sys.stdout.flush()\n",
" # For each training example...\n",
" for i in range(len(y_train)):\n",
" # One SGD step\n",
" model.sgd_step(X_train[i], y_train[i], learning_rate)\n",
" num_examples_seen += 1"
"execution_count": 0,
"outputs": []
"cell_type": "code",
"metadata": {
"id": "tLw_nLKQfABy",
"colab_type": "code",
"outputId": "f89d5c20-2ad8-4e76-8c82-e53cfe87e9e4",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
"source": [
"model = RNNPy(vocabulary_size)\n",
"%timeit model.sgd_step(X_train[10], y_train[10], 0.005)"
"execution_count": 0,
"outputs": [
"output_type": "stream",
"text": [
"10 loops, best of 3: 153 ms per loop\n"
"name": "stdout"
"cell_type": "code",
"metadata": {
"id": "99E8hg4XfFbR",
"colab_type": "code",
"outputId": "9aa679f9-0056-4544-ee5c-3cb5e06c58e2",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 272
"source": [
"# Train on a small subset of the data to see what happens\n",
"model = RNNPy(vocabulary_size)\n",
"losses = train_with_sgd(model, X_train[:100], y_train[:100], nepoch=10, evaluate_loss_after=1)"
"execution_count": 0,
"outputs": [
"output_type": "stream",
"text": [
"/usr/local/lib/python2.7/dist-packages/ DeprecationWarning: Calling np.sum(generator) is deprecated, and in the future will give a different result. Use np.sum(np.fromiter(generator)) or the python sum builtin instead.\n",
" from ipykernel import kernelapp as app\n",
"/usr/local/lib/python2.7/dist-packages/ RuntimeWarning: divide by zero encountered in log\n",
" if __name__ == '__main__':\n"
"name": "stderr"
"output_type": "stream",
"text": [
"2020-01-15 19:38:44: Loss after num_examples_seen=0 epoch=0: inf\n",
"2020-01-15 19:38:52: Loss after num_examples_seen=100 epoch=1: inf\n",
"2020-01-15 19:39:00: Loss after num_examples_seen=200 epoch=2: inf\n",
"2020-01-15 19:39:09: Loss after num_examples_seen=300 epoch=3: inf\n",
"2020-01-15 19:39:17: Loss after num_examples_seen=400 epoch=4: inf\n",
"2020-01-15 19:39:25: Loss after num_examples_seen=500 epoch=5: inf\n",
"2020-01-15 19:39:33: Loss after num_examples_seen=600 epoch=6: inf\n",
"2020-01-15 19:39:42: Loss after num_examples_seen=700 epoch=7: inf\n",
"2020-01-15 19:39:50: Loss after num_examples_seen=800 epoch=8: inf\n",
"2020-01-15 19:39:58: Loss after num_examples_seen=900 epoch=9: inf\n"
"name": "stdout"
"cell_type": "code",
"metadata": {
"id": "GkwgC04jtupa",
"colab_type": "code",
"colab": {}
"source": [
"execution_count": 0,
"outputs": []
Copy link

An effort to build a RNN from the scratch!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment