Skip to content

Instantly share code, notes, and snippets.

@ZhipengHe
Created May 3, 2024 06:14
Show Gist options
  • Save ZhipengHe/6daa3ee17381a0dc2388df0c40ba03e7 to your computer and use it in GitHub Desktop.
Save ZhipengHe/6daa3ee17381a0dc2388df0c40ba03e7 to your computer and use it in GitHub Desktop.
LSTM Autoencoder.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"toc_visible": true,
"gpuType": "T4",
"mount_file_id": "1g47n-xhphw8m5_nT9HMdnZ-p1xMCPtpX",
"authorship_tag": "ABX9TyMKh2u5UwNPInA65juJpENP",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/ZhipengHe/6daa3ee17381a0dc2388df0c40ba03e7/lstm-autoencoder.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"## Preprocessing data"
],
"metadata": {
"id": "p2zF-r8i2cwa"
}
},
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"\n",
"drive.mount('/content/drive')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yMGyX_vwyns8",
"outputId": "209faa12-b4dc-4181-bcff-ad72f4b7e2e1"
},
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "cfomMlZ8wqbL"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"sns.set(color_codes=True)\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"\n",
"# Parameters\n",
"dataDir = '/content/drive/MyDrive/Data/ECG/'\n",
"trainData = f'{dataDir}train_data.npz'\n",
"testData = f'{dataDir}test_data.npz'\n",
"\n",
"FS = 300 # Sampling rate: 300 Hz\n",
"WINDOW_SIZE = 9*FS # max 60 seconds"
]
},
{
"cell_type": "code",
"source": [
"import tensorflow as tf\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import LSTM, RepeatVector, TimeDistributed, Dense, Flatten"
],
"metadata": {
"id": "SHkHpqWHxMLz"
},
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(tf.config.list_physical_devices('GPU'))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "CE0DAGKZzB3X",
"outputId": "433e5930-cdfc-4c54-fa97-d1b35b556137"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Load data from .npz files\n",
"train_npz = np.load(trainData)\n",
"train_names, train_data, train_labels = train_npz['name'], train_npz['data'], train_npz['label']\n",
"\n",
"test_npz = np.load(testData)\n",
"test_names, test_data, test_labels = test_npz['name'], test_npz['data'], test_npz['label']"
],
"metadata": {
"id": "4Dimm8PmxNNL"
},
"execution_count": 5,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Data are preprocessed by:\n",
"```python\n",
"# Preprocess data\n",
"data = np.nan_to_num(data) # removing NaNs and Infs\n",
"data = data - np.mean(data)\n",
"data = data/np.std(data)\n",
"```"
],
"metadata": {
"id": "pVObTFhdEAAh"
}
},
{
"cell_type": "code",
"source": [
"X_train = train_data[:, : WINDOW_SIZE]\n",
"y_train = train_labels\n",
"X_test = test_data[:, : WINDOW_SIZE]\n",
"y_test = test_labels\n",
"\n",
"# Convert labels to binary (0 for normal, 1 for anomaly)\n",
"y_train_binary = np.where(y_train == 'normal', 0, 1) # only normal in training data\n",
"y_test_binary = np.where(y_test == 'normal', 0, 1)\n"
],
"metadata": {
"id": "ylIi8K2eyI77"
},
"execution_count": 6,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn.preprocessing import MinMaxScaler\n",
"scaler = MinMaxScaler()\n",
"# scaler fit both training and testing data\n",
"combined_data = np.concatenate([X_train, X_test])\n",
"scaler.fit(combined_data)\n",
"X_train_scalered = scaler.transform(X_train)\n",
"y_train_scalered = scaler.transform(X_test)\n"
],
"metadata": {
"id": "HSpxYCGz3Z9b"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Build Models"
],
"metadata": {
"id": "2XC_ZzI6ySeM"
}
},
{
"cell_type": "code",
"source": [
"input_shape = (X_train_scalered.shape[1], 1)"
],
"metadata": {
"id": "rJJtl6aZyPvU"
},
"execution_count": 8,
"outputs": []
},
{
"cell_type": "code",
"source": [
"input_shape"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "7d40GMNW5H-p",
"outputId": "3ef652d6-7413-4b20-b866-8d766433c034"
},
"execution_count": 9,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(2700, 1)"
]
},
"metadata": {},
"execution_count": 9
}
]
},
{
"cell_type": "code",
"source": [
"model = Sequential()\n",
"model.add(LSTM(128, activation='relu', input_shape=input_shape, return_sequences=True))\n",
"model.add(LSTM(64, activation='relu', return_sequences=False))\n",
"model.add(RepeatVector(input_shape[0]))\n",
"model.add(LSTM(64, activation='relu', return_sequences=True))\n",
"model.add(LSTM(128, activation='relu', return_sequences=True))\n",
"model.add(TimeDistributed(Dense(input_shape[1])))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1SX09TlWyRuY",
"outputId": "389821d3-f5a0-4e14-e38b-81b8f783d89e"
},
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"WARNING:tensorflow:Layer lstm will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.\n",
"WARNING:tensorflow:Layer lstm_1 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.\n",
"WARNING:tensorflow:Layer lstm_2 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.\n",
"WARNING:tensorflow:Layer lstm_3 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"opt = tf.keras.optimizers.Adam(learning_rate=0.001)\n",
"model.compile(optimizer=opt, loss='mae')\n",
"\n",
"model.summary()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "z8y9XRtLyVeN",
"outputId": "3be836e2-5ea6-4849-a749-4c18f3934040"
},
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Model: \"sequential\"\n",
"_________________________________________________________________\n",
" Layer (type) Output Shape Param # \n",
"=================================================================\n",
" lstm (LSTM) (None, 2700, 128) 66560 \n",
" \n",
" lstm_1 (LSTM) (None, 64) 49408 \n",
" \n",
" repeat_vector (RepeatVecto (None, 2700, 64) 0 \n",
" r) \n",
" \n",
" lstm_2 (LSTM) (None, 2700, 64) 33024 \n",
" \n",
" lstm_3 (LSTM) (None, 2700, 128) 98816 \n",
" \n",
" time_distributed (TimeDist (None, 2700, 1) 129 \n",
" ributed) \n",
" \n",
"=================================================================\n",
"Total params: 247937 (968.50 KB)\n",
"Trainable params: 247937 (968.50 KB)\n",
"Non-trainable params: 0 (0.00 Byte)\n",
"_________________________________________________________________\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"history = model.fit(X_train_scalered[:100], X_train_scalered[:100], epochs=20, batch_size=32, validation_split=0.1).history"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8wOnvQasyWbV",
"outputId": "52929c25-2100-47e1-911d-db414cbb96a9"
},
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Epoch 1/20\n",
"3/3 [==============================] - 65s 16s/step - loss: 0.4781 - val_loss: 0.4389\n",
"Epoch 2/20\n",
"3/3 [==============================] - 50s 17s/step - loss: 0.4065 - val_loss: 0.3106\n",
"Epoch 3/20\n",
"3/3 [==============================] - 49s 17s/step - loss: 0.2230 - val_loss: 0.1606\n",
"Epoch 4/20\n",
"3/3 [==============================] - 47s 16s/step - loss: 0.1394 - val_loss: 0.0824\n",
"Epoch 5/20\n",
"3/3 [==============================] - 53s 17s/step - loss: 0.1014 - val_loss: 0.1171\n",
"Epoch 6/20\n",
"3/3 [==============================] - 49s 17s/step - loss: 0.1039 - val_loss: 0.0775\n",
"Epoch 7/20\n",
"3/3 [==============================] - 47s 16s/step - loss: 0.0887 - val_loss: 0.0919\n",
"Epoch 8/20\n",
"3/3 [==============================] - 49s 17s/step - loss: 0.0843 - val_loss: 0.0809\n",
"Epoch 9/20\n",
"3/3 [==============================] - 50s 18s/step - loss: 0.0841 - val_loss: 0.0828\n",
"Epoch 10/20\n",
"3/3 [==============================] - 47s 16s/step - loss: 0.0803 - val_loss: 0.0786\n",
"Epoch 11/20\n",
"3/3 [==============================] - 49s 17s/step - loss: 0.0806 - val_loss: 0.0787\n",
"Epoch 12/20\n",
"3/3 [==============================] - 48s 16s/step - loss: 0.0785 - val_loss: 0.0785\n",
"Epoch 13/20\n",
"3/3 [==============================] - 50s 17s/step - loss: 0.0792 - val_loss: 0.0775\n",
"Epoch 14/20\n",
"3/3 [==============================] - 61s 21s/step - loss: 0.0775 - val_loss: 0.0773\n",
"Epoch 15/20\n",
"3/3 [==============================] - 55s 20s/step - loss: 0.0782 - val_loss: 0.0770\n",
"Epoch 16/20\n",
"3/3 [==============================] - 52s 18s/step - loss: 0.0775 - val_loss: 0.0773\n",
"Epoch 17/20\n",
"3/3 [==============================] - 53s 19s/step - loss: 0.0778 - val_loss: 0.0767\n",
"Epoch 18/20\n",
"3/3 [==============================] - 50s 17s/step - loss: 0.0774 - val_loss: 0.0770\n",
"Epoch 19/20\n",
"3/3 [==============================] - 49s 17s/step - loss: 0.0775 - val_loss: 0.0766\n",
"Epoch 20/20\n",
"3/3 [==============================] - 48s 17s/step - loss: 0.0772 - val_loss: 0.0766\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# plot the training losses\n",
"fig, ax = plt.subplots(figsize=(14, 6), dpi=80)\n",
"ax.plot(history['loss'], 'b', label='Train', linewidth=2)\n",
"ax.plot(history['val_loss'], 'r', label='Validation', linewidth=2)\n",
"ax.set_title('Model loss', fontsize=16)\n",
"ax.set_ylabel('Loss (mae)')\n",
"ax.set_xlabel('Epoch')\n",
"ax.legend(loc='upper right')\n",
"plt.show()"
],
"metadata": {
"id": "ofl13e1cyayt",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 448
},
"outputId": "2d839cf8-07a3-49f4-ceb1-3e120adea123"
},
"execution_count": 13,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 1120x480 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "HIgyeS7R71rP"
},
"execution_count": 13,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment