Skip to content

Instantly share code, notes, and snippets.

@chck
Created March 14, 2018 17:37
Show Gist options
  • Save chck/8360c2e114683e5e42c852b114fbeeba to your computer and use it in GitHub Desktop.
Save chck/8360c2e114683e5e42c852b114fbeeba to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"Reference: https://github.com/keras-team/keras/blob/master/examples/imdb_cnn.py\n",
"\"\"\"\n",
"\n",
"import os\n",
"from tensorflow.python.keras.preprocessing.sequence import pad_sequences\n",
"from tensorflow.python.keras.models import Sequential\n",
"from tensorflow.python.keras.layers import Dense, Dropout, Activation, Embedding, Conv1D, GlobalMaxPool1D, InputLayer\n",
"from keras.datasets import imdb # https://github.com/tensorflow/tensorflow/issues/16358\n",
"# from keras.datasets import reuters\n",
"\n",
"os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((25000,), (25000,))"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"Load data\n",
"\"\"\"\n",
"\n",
"(X_train, y_train), (X_val, y_val) = imdb.load_data(num_words=5000)\n",
"# (X_train, y_train), (X_val, y_val) = reuters.load_data(num_words=5000)\n",
"X_train.shape, X_val.shape"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((25000, 400), (25000, 400))"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"Preprocess\n",
"\"\"\"\n",
"\n",
"X_train = pad_sequences(X_train, maxlen=400)\n",
"X_val = pad_sequences(X_val, maxlen=400)\n",
"X_train.shape, X_val.shape"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"input_9 (InputLayer) (None, 400) 0 \n",
"_________________________________________________________________\n",
"embedding_9 (Embedding) (None, 400, 50) 250000 \n",
"_________________________________________________________________\n",
"dropout_16 (Dropout) (None, 400, 50) 0 \n",
"_________________________________________________________________\n",
"conv1d_9 (Conv1D) (None, 398, 250) 37750 \n",
"_________________________________________________________________\n",
"global_max_pooling1d_9 (Glob (None, 250) 0 \n",
"_________________________________________________________________\n",
"dense_16 (Dense) (None, 250) 62750 \n",
"_________________________________________________________________\n",
"dropout_17 (Dropout) (None, 250) 0 \n",
"_________________________________________________________________\n",
"dense_17 (Dense) (None, 1) 251 \n",
"=================================================================\n",
"Total params: 350,751\n",
"Trainable params: 350,751\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"model = Sequential([\n",
" InputLayer(input_shape=(400, )),\n",
" Embedding(input_dim=5000, output_dim=50),\n",
" Dropout(0.2),\n",
" Conv1D(250, 3, activation='relu'),\n",
" GlobalMaxPool1D(),\n",
" Dense(250, activation='relu'),\n",
" Dropout(0.2),\n",
"# Activation('relu'),\n",
" Dense(1, activation='sigmoid'),\n",
"])\n",
"\n",
"model.compile(\n",
" loss='binary_crossentropy',\n",
" optimizer='adam',\n",
" metrics=['accuracy'],\n",
")\n",
"\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 25000 samples, validate on 25000 samples\n",
"Epoch 1/10\n",
"25000/25000 [==============================]25000/25000 [==============================] - 271s 11ms/step - loss: 0.6829 - acc: 0.5407 - val_loss: 0.6249 - val_acc: 0.6465\n",
"\n",
"Epoch 2/10\n",
"25000/25000 [==============================]25000/25000 [==============================] - 289s 12ms/step - loss: 0.4901 - acc: 0.7613 - val_loss: 0.4464 - val_acc: 0.7880\n",
"\n",
"Epoch 3/10\n",
"25000/25000 [==============================]25000/25000 [==============================] - 274s 11ms/step - loss: 0.3890 - acc: 0.8243 - val_loss: 0.3674 - val_acc: 0.8386\n",
"\n",
"Epoch 4/10\n",
"25000/25000 [==============================]25000/25000 [==============================] - 285s 11ms/step - loss: 0.3357 - acc: 0.8538 - val_loss: 0.3220 - val_acc: 0.8614\n",
"\n",
"Epoch 5/10\n",
"25000/25000 [==============================]25000/25000 [==============================] - 281s 11ms/step - loss: 0.2968 - acc: 0.8737 - val_loss: 0.2940 - val_acc: 0.8774\n",
"\n",
"Epoch 6/10\n",
"25000/25000 [==============================]25000/25000 [==============================] - 288s 12ms/step - loss: 0.2674 - acc: 0.8884 - val_loss: 0.3458 - val_acc: 0.8475\n",
"\n",
"Epoch 7/10\n",
"25000/25000 [==============================]25000/25000 [==============================] - 275s 11ms/step - loss: 0.2426 - acc: 0.8998 - val_loss: 0.2805 - val_acc: 0.8834\n",
"\n",
"Epoch 8/10\n",
"25000/25000 [==============================]25000/25000 [==============================] - 287s 11ms/step - loss: 0.2269 - acc: 0.9077 - val_loss: 0.2982 - val_acc: 0.8781\n",
"\n",
"Epoch 9/10\n",
"25000/25000 [==============================]25000/25000 [==============================] - 288s 12ms/step - loss: 0.2078 - acc: 0.9170 - val_loss: 0.2795 - val_acc: 0.8868\n",
"\n",
"Epoch 10/10\n",
"25000/25000 [==============================]25000/25000 [==============================] - 283s 11ms/step - loss: 0.2009 - acc: 0.9193 - val_loss: 0.2896 - val_acc: 0.8776\n",
"\n"
]
}
],
"source": [
"stack = model.fit(\n",
" X_train,\n",
" y_train,\n",
" batch_size=32,\n",
" epochs=10,\n",
" validation_data=(X_val, y_val),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x1335f1518>"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x1335f14a8>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%matplotlib inline\n",
"import pandas as pd\n",
"pd.DataFrame(stack.history)[['loss', 'val_loss']].plot()"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x1335f1748>"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x12727cef0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pd.DataFrame(stack.history)[['acc', 'val_acc']].plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment