Skip to content

Instantly share code, notes, and snippets.

@dsblank
Created March 8, 2018 15:44
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dsblank/8027fb28b59057cb62dd1095ba22fbdf to your computer and use it in GitHub Desktop.
Save dsblank/8027fb28b59057cb62dd1095ba22fbdf to your computer and use it in GitHub Desktop.
Working code for Morse Code decoder
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Working Morse Code\n",
"\n",
"Based on https://towardsdatascience.com/cracking-morse-code-with-rnns-e5883355a6f3\n",
"\n",
"Fixed bugs, made a couple of things more clear."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/dblank/.local/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
" from ._conv import register_converters as _register_converters\n",
"Using TensorFlow backend.\n"
]
}
],
"source": [
"import random\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from keras.models import Sequential\n",
"from keras import layers"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# construct the Morse dictionary\n",
"alphabet = list(\"abcdefghijklmnopqrstuvwxyz\")\n",
"values = ['.-', '-...', '-.-.', '-..', '.', '..-.', '--.', '....', '..', '.---', '-.-', \n",
" '.-..', '--', '-.','---', '.--.', '--.-', \n",
" '.-.', '...', '-', '..-', '...-', '.--', '-..-', '-.--', '--..']\n",
"morse_dict = dict(zip(alphabet, values))\n",
"def morse_encode(word):\n",
" return \"*\".join([morse_dict[i]for i \n",
" in \" \".join(word).split()])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"word_len = 9\n",
"max_len_x = 4*word_len + (word_len-1)\n",
"max_len_y = word_len\n",
"def data_gen(n):\n",
" \n",
" with open('words_alpha.txt', 'r') as f:\n",
" all_words = f.read().lower().split('\\n') \n",
" words = [word for word in all_words if len(word)==n]\n",
" \n",
" # Shuffle the list since the words are ordered\n",
" random.shuffle(words)\n",
" \n",
" g_out = lambda x: ' '*(max_len_y -len(x)) + x\n",
" output_list = [g_out(word) for word in words]\n",
" \n",
" g_in = lambda x: morse_encode(x)+' '*(max_len_x\n",
" - len(morse_encode(x)))\n",
" input_list = [g_in(word) for word in words]\n",
" \n",
" return output_list, input_list\n",
"output_list, input_list = data_gen(9)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"class CharTable(object):\n",
" def __init__(self, chars):\n",
" self.chars = sorted(set(chars))\n",
" self.char_indices = dict((c, i) for i, c in\n",
" enumerate(self.chars))\n",
" self.indices_char = dict((i, c) for i, c in \n",
" enumerate(self.chars))\n",
" def encode(self, token, num_rows):\n",
" x = np.zeros((num_rows, len(self.chars)))\n",
" for i, c in enumerate(token):\n",
" x[i, self.char_indices[c]] = 1\n",
" return x\n",
" def decode(self, x, calc_argmax=True):\n",
" if calc_argmax:\n",
" x = [x.argmax(axis=-1)]\n",
" return ''.join(self.indices_char[int(v)] for v in x)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# we include the white space as a character in both cases below.\n",
"chars_in = '*-. '\n",
"chars_out = 'abcdefghijklmnopqrstuvwxyz '\n",
"ctable_in = CharTable(chars_in)\n",
"ctable_out = CharTable(chars_out)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"x = np.zeros((len(input_list), max_len_x, len(chars_in)))\n",
"y = np.zeros((len(output_list), max_len_y, len(chars_out)))\n",
"for i, token in enumerate(input_list):\n",
" x[i] = ctable_in.encode(token, max_len_x)\n",
"for i, token in enumerate(output_list):\n",
" y[i] = ctable_out.encode(token, max_len_y)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"m = len(x)// 4\n",
"(x_train, x_val) = x[:m], x[m:]\n",
"(y_train, y_val) = y[:m], y[m:]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(13350, 44, 4)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--*.-*.-.*--.*.*.-..*..*-.*. margeline\n",
".*.--.*..*--.*.-*...*-*.*.-. epigaster\n",
"---*...-*.*.-.*-...*..-*..*.-..*-.. overbuild\n",
".-..*.-*.-.*-.--*-.*--.*.*.-*.-.. laryngeal\n",
"--*.-*..-*.-.*.*...*--.-*..-*. mauresque\n",
"-.-.*---*--.*-.*..*-*..*...-*. cognitive\n",
"--*..*-.-.*.-.*---*-...*---*-..*-.-- microbody\n",
"-.-.*.-*-.*.-*-.*.-*.*.-*-. cananaean\n",
".-*..-.*-*.*.-.*-.-*..*-.*--. afterking\n",
"--*..*...*-*....*.-.*---*.--*-. misthrown\n"
]
}
],
"source": [
"for i in range(10):\n",
" print(\"\".join([ctable_in.decode(code) for code in x_train[i]]),\n",
" \"\".join([ctable_out.decode(code) for code in y_train[i]]))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"latent_dim = 256"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"input_1 (InputLayer) (None, 44, 4) 0 \n",
"_________________________________________________________________\n",
"lstm_1 (LSTM) (None, 256) 267264 \n",
"_________________________________________________________________\n",
"repeat_vector_1 (RepeatVecto (None, 9, 256) 0 \n",
"_________________________________________________________________\n",
"lstm_2 (LSTM) (None, 9, 256) 525312 \n",
"_________________________________________________________________\n",
"time_distributed_1 (TimeDist (None, 9, 27) 6939 \n",
"_________________________________________________________________\n",
"activation_1 (Activation) (None, 9, 27) 0 \n",
"=================================================================\n",
"Total params: 799,515\n",
"Trainable params: 799,515\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"model = Sequential()\n",
"model.add(layers.InputLayer((max_len_x, len(chars_in))))\n",
"model.add(layers.LSTM(latent_dim))\n",
"model.add(layers.RepeatVector(max_len_y))\n",
"model.add(layers.LSTM(latent_dim, return_sequences=True))\n",
"model.add(layers.TimeDistributed(layers.Dense(len(chars_out))))\n",
"model.add(layers.Activation('softmax'))\n",
"model.compile(loss='categorical_crossentropy', optimizer='adam',\n",
" metrics=['accuracy'])\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"Epochs = 120\n",
"Batch_size = 1024"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 13350 samples, validate on 40053 samples\n",
"Epoch 1/1\n",
"13350/13350 [==============================] - 69s 5ms/step - loss: 3.0553 - acc: 0.0960 - val_loss: 2.9118 - val_acc: 0.1142\n"
]
}
],
"source": [
"hist = model.fit(x_train, y_train, batch_size=Batch_size, epochs=\n",
" Epochs, validation_data=(x_val, y_val))"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fe35c69af60>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(20,5))\n",
"plt.subplot(121)\n",
"plt.plot(hist.history['acc'])\n",
"plt.plot(hist.history['val_acc'])\n",
"plt.title('model accuracy')\n",
"plt.ylabel('accuracy')\n",
"plt.xlabel('epoch')\n",
"plt.legend(['train', 'validation'], loc='upper left')\n",
"plt.subplot(122)\n",
"plt.plot(hist.history['loss'])\n",
"plt.plot(hist.history['val_loss'])\n",
"plt.title('model loss')\n",
"plt.ylabel('loss')\n",
"plt.xlabel('epoch')\n",
"plt.legend(['train', 'validation'], loc='upper right')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment