Skip to content

Instantly share code, notes, and snippets.

@shanecandoit
Created September 16, 2019 18:09
Show Gist options
  • Save shanecandoit/53d0841f425472425a48a8f7435c1baf to your computer and use it in GitHub Desktop.
Save shanecandoit/53d0841f425472425a48a8f7435c1baf to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# IMDB classify movie reviews"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"from keras.datasets import imdb"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(((25000,), (25000,)), ((25000,), (25000,)))"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10_000)\n",
"(train_data.shape, train_labels.shape), (test_data.shape, test_labels.shape)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[1,\n",
" 14,\n",
" 22,\n",
" 16,\n",
" 43,\n",
" 530,\n",
" 973,\n",
" 1622,\n",
" 1385,\n",
" 65,\n",
" 458,\n",
" 4468,\n",
" 66,\n",
" 3941,\n",
" 4,\n",
" 173,\n",
" 36,\n",
" 256,\n",
" 5,\n",
" 25,\n",
" 100,\n",
" 43,\n",
" 838,\n",
" 112,\n",
" 50,\n",
" 670,\n",
" 2,\n",
" 9,\n",
" 35,\n",
" 480,\n",
" 284,\n",
" 5,\n",
" 150,\n",
" 4,\n",
" 172,\n",
" 112,\n",
" 167,\n",
" 2,\n",
" 336,\n",
" 385,\n",
" 39,\n",
" 4,\n",
" 172,\n",
" 4536,\n",
" 1111,\n",
" 17,\n",
" 546,\n",
" 38,\n",
" 13,\n",
" 447,\n",
" 4,\n",
" 192,\n",
" 50,\n",
" 16,\n",
" 6,\n",
" 147,\n",
" 2025,\n",
" 19,\n",
" 14,\n",
" 22,\n",
" 4,\n",
" 1920,\n",
" 4613,\n",
" 469,\n",
" 4,\n",
" 22,\n",
" 71,\n",
" 87,\n",
" 12,\n",
" 16,\n",
" 43,\n",
" 530,\n",
" 38,\n",
" 76,\n",
" 15,\n",
" 13,\n",
" 1247,\n",
" 4,\n",
" 22,\n",
" 17,\n",
" 515,\n",
" 17,\n",
" 12,\n",
" 16,\n",
" 626,\n",
" 18,\n",
" 2,\n",
" 5,\n",
" 62,\n",
" 386,\n",
" 12,\n",
" 8,\n",
" 316,\n",
" 8,\n",
" 106,\n",
" 5,\n",
" 4,\n",
" 2223,\n",
" 5244,\n",
" 16,\n",
" 480,\n",
" 66,\n",
" 3785,\n",
" 33,\n",
" 4,\n",
" 130,\n",
" 12,\n",
" 16,\n",
" 38,\n",
" 619,\n",
" 5,\n",
" 25,\n",
" 124,\n",
" 51,\n",
" 36,\n",
" 135,\n",
" 48,\n",
" 25,\n",
" 1415,\n",
" 33,\n",
" 6,\n",
" 22,\n",
" 12,\n",
" 215,\n",
" 28,\n",
" 77,\n",
" 52,\n",
" 5,\n",
" 14,\n",
" 407,\n",
" 16,\n",
" 82,\n",
" 2,\n",
" 8,\n",
" 4,\n",
" 107,\n",
" 117,\n",
" 5952,\n",
" 15,\n",
" 256,\n",
" 4,\n",
" 2,\n",
" 7,\n",
" 3766,\n",
" 5,\n",
" 723,\n",
" 36,\n",
" 71,\n",
" 43,\n",
" 530,\n",
" 476,\n",
" 26,\n",
" 400,\n",
" 317,\n",
" 46,\n",
" 7,\n",
" 4,\n",
" 2,\n",
" 1029,\n",
" 13,\n",
" 104,\n",
" 88,\n",
" 4,\n",
" 381,\n",
" 15,\n",
" 297,\n",
" 98,\n",
" 32,\n",
" 2071,\n",
" 56,\n",
" 26,\n",
" 141,\n",
" 6,\n",
" 194,\n",
" 7486,\n",
" 18,\n",
" 4,\n",
" 226,\n",
" 22,\n",
" 21,\n",
" 134,\n",
" 476,\n",
" 26,\n",
" 480,\n",
" 5,\n",
" 144,\n",
" 30,\n",
" 5535,\n",
" 18,\n",
" 51,\n",
" 36,\n",
" 28,\n",
" 224,\n",
" 92,\n",
" 25,\n",
" 104,\n",
" 4,\n",
" 226,\n",
" 65,\n",
" 16,\n",
" 38,\n",
" 1334,\n",
" 88,\n",
" 12,\n",
" 16,\n",
" 283,\n",
" 5,\n",
" 16,\n",
" 4472,\n",
" 113,\n",
" 103,\n",
" 32,\n",
" 15,\n",
" 16,\n",
" 5345,\n",
" 19,\n",
" 178,\n",
" 32]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_data[0]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_labels[0]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"9999"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# top 10,000 most frequent words\n",
"\n",
"max([max(seq) for seq in train_data])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#decode\n",
"word_index = imdb.get_word_index()\n",
"rev_word_index = dict([(val, key) for (key, val) in word_index.items()])\n",
"def decode(review_id):\n",
" rev = ' '.join([rev_word_index.get(i - 3, '?') for i in train_data[review_id]])\n",
" return rev"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 ? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all\n",
"1 ? big hair big boobs bad music and a giant safety pin these are the words to best describe this terrible movie i love cheesy horror movies and i've seen hundreds but this had got to be on of the worst ever made the plot is paper thin and ridiculous the acting is an abomination the script is completely laughable the best is the end showdown with the cop and how he worked out who the killer is it's just so damn terribly written the clothes are sickening and funny in equal ? the hair is big lots of boobs ? men wear those cut ? shirts that show off their ? sickening that men actually wore them and the music is just ? trash that plays over and over again in almost every scene there is trashy music boobs and ? taking away bodies and the gym still doesn't close for ? all joking aside this is a truly bad film whose only charm is to look back on the disaster that was the 80's and have a good old laugh at how bad everything was back then\n",
"2 ? this has to be one of the worst films of the 1990s when my friends i were watching this film being the target audience it was aimed at we just sat watched the first half an hour with our jaws touching the floor at how bad it really was the rest of the time everyone else in the theatre just started talking to each other leaving or generally crying into their popcorn that they actually paid money they had ? working to watch this feeble excuse for a film it must have looked like a great idea on paper but on film it looks like no one in the film has a clue what is going on crap acting crap costumes i can't get across how ? this is to watch save yourself an hour a bit of your life\n",
"3 ? the ? ? at storytelling the traditional sort many years after the event i can still see in my ? eye an elderly lady my friend's mother retelling the battle of ? she makes the characters come alive her passion is that of an eye witness one to the events on the ? heath a mile or so from where she lives br br of course it happened many years before she was born but you wouldn't guess from the way she tells it the same story is told in bars the length and ? of scotland as i discussed it with a friend one night in ? a local cut in to give his version the discussion continued to closing time br br stories passed down like this become part of our being who doesn't remember the stories our parents told us when we were children they become our invisible world and as we grow older they maybe still serve as inspiration or as an emotional ? fact and fiction blend with ? role models warning stories ? magic and mystery br br my name is ? like my grandfather and his grandfather before him our protagonist introduces himself to us and also introduces the story that stretches back through generations it produces stories within stories stories that evoke the ? wonder of scotland its rugged mountains ? in ? the stuff of legend yet ? is ? in reality this is what gives it its special charm it has a rough beauty and authenticity ? with some of the finest ? singing you will ever hear br br ? ? visits his grandfather in hospital shortly before his death he burns with frustration part of him ? to be in the twenty first century to hang out in ? but he is raised on the western ? among a ? speaking community br br yet there is a deeper conflict within him he ? to know the truth the truth behind his ? ancient stories where does fiction end and he wants to know the truth behind the death of his parents br br he is pulled to make a last ? journey to the ? of one of ? most ? mountains can the truth be told or is it all in stories br br in this story about stories we ? bloody battles ? lovers the ? of old and the sometimes more ? ? of accepted truth in doing so we each connect with ? as he lives the story of his own life br br ? the ? ? is probably the most honest ? and genuinely beautiful film of scotland ever made like ? i got slightly annoyed with the ? of hanging stories on more stories but also like ? i ? this once i saw the ? picture ' forget the box office ? of braveheart and its like you might even ? the ? famous ? of the wicker man to see a film that is true to scotland this one is probably unique if you maybe ? on it deeply enough you might even re ? the power of storytelling and the age old question of whether there are some truths that cannot be told but only experienced\n",
"4 ? worst mistake of my life br br i picked this movie up at target for 5 because i figured hey it's sandler i can get some cheap laughs i was wrong completely wrong mid way through the film all three of my friends were asleep and i was still suffering worst plot worst script worst movie i have ever seen i wanted to hit my head up against a wall for an hour then i'd stop and you know why because it felt damn good upon bashing my head in i stuck that damn movie in the ? and watched it burn and that felt better than anything else i've ever done it took american psycho army of darkness and kill bill just to get over that crap i hate you sandler for actually going through with this and ruining a whole day of my life\n"
]
}
],
"source": [
"for i in range(5):\n",
" print(i, decode(i))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# prepare variable length review into a one hot encoding to feed to nueral network"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"def vectorize_sequences(seqs, dims=10_000):\n",
" results = np.zeros((len(seqs), dims))\n",
" for i, seq in enumerate(seqs):\n",
" results[i, seq] = 1.\n",
" return results"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"x_train, x_test = vectorize_sequences(train_data), vectorize_sequences(test_data)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((25000, 10000), (25000, 10000))"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x_train.shape, x_test.shape"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0., 1., 1., ..., 0., 0., 0.])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x_train[0]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"y_train = np.asarray(train_labels).astype('float32')\n",
"y_test = np.asarray(test_labels).astype('float32')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"# model"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"from keras import models\n",
"from keras import layers"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"model = models.Sequential()\n",
"model.add(layers.Dense(16, activation='relu', input_shape=(10_000,)))\n",
"model.add(layers.Dense(64, activation='relu'))\n",
"model.add(layers.Dense(32, activation='relu'))\n",
"model.add(layers.Dense(1, activation='sigmoid'))"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"model.compile(optimizer='adam',\n",
" loss='binary_crossentropy',\n",
" metrics=['acc'])"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
"x_val = x_train[:10_000]\n",
"partial_x_train = x_train[10_000:]"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"y_val = y_train[:10_000]\n",
"partial_y_train = y_train[10_000:]"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 15000 samples, validate on 10000 samples\n",
"Epoch 1/4\n",
"15000/15000 [==============================] - 3s 203us/step - loss: 0.5285 - acc: 0.7785 - val_loss: 0.3335 - val_acc: 0.8691\n",
"Epoch 2/4\n",
"15000/15000 [==============================] - 1s 97us/step - loss: 0.2328 - acc: 0.9113 - val_loss: 0.2960 - val_acc: 0.8821\n",
"Epoch 3/4\n",
"15000/15000 [==============================] - 1s 95us/step - loss: 0.1396 - acc: 0.9525 - val_loss: 0.3189 - val_acc: 0.8796\n",
"Epoch 4/4\n",
"15000/15000 [==============================] - 1s 96us/step - loss: 0.0904 - acc: 0.9715 - val_loss: 0.3630 - val_acc: 0.8775\n"
]
}
],
"source": [
"history = model.fit(partial_x_train,\n",
" partial_y_train,\n",
" epochs= 4, # 20,\n",
" batch_size=512,\n",
" validation_data=(x_val, y_val))"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# val_loss: 0.6899 - val_acc: 0.8686\n",
"\n",
"history_dict = history.history\n",
"history_dict.keys()"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
"# plot"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"loss_values = history_dict['loss']\n",
"val_loss_values = history_dict['val_loss']\n",
"\n",
"acc = history_dict['acc']\n",
"epochs = range(1,len(acc) + 1)\n",
"\n",
"plt.plot(epochs, loss_values, 'bo', label='Train loss')\n",
"plt.plot(epochs, val_loss_values, 'b', label='Validation loss')\n",
"plt.title('Train, Validation loss')\n",
"plt.xlabel('Epochs')\n",
"plt.ylabel('Loss')\n",
"plt.legend()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.clf()\n",
"\n",
"acc_values = acc\n",
"val_acc_values = history_dict['val_acc']\n",
"\n",
"plt.plot(epochs, acc, 'bo', label='Training acc')\n",
"plt.plot(epochs, val_acc_values, 'b', label='Validation acc')\n",
"plt.title('Train, Validation acc')\n",
"plt.xlabel('epochs')\n",
"plt.ylabel('acc')\n",
"plt.legend()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"25000/25000 [==============================] - 2s 67us/step\n"
]
}
],
"source": [
"results = model.evaluate(x_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0.3916580714178085, 0.86696]"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# [0.7482284175515175, 0.85512]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@shanecandoit
Copy link
Author

Validation Accuracy peaks at 87.75%

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment