Created
July 29, 2021 11:09
-
-
Save furkanozbay/0efc000bc7c43bd414a51350f06058af to your computer and use it in GitHub Desktop.
Text classification with Transformer-OnlyEvaluate.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Text classification with Transformer-OnlyEvaluate.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyNfznPnUteT1LWUVrbPJscc", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/furkanozbay/0efc000bc7c43bd414a51350f06058af/text-classification-with-transformer-onlyevaluate.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "z-Olpy91ZKAj", | |
"outputId": "a21fe61f-d20a-4f2b-f8b2-75786758176d" | |
}, | |
"source": [ | |
"import tensorflow as tf\n", | |
"\n", | |
"import numpy as np\n", | |
"import random as python_random\n", | |
"# Seed value (can actually be different for each attribution step)\n", | |
"seed_value= 123\n", | |
"\n", | |
"# 1. Set `PYTHONHASHSEED` environment variable at a fixed value\n", | |
"import os\n", | |
"os.environ['PYTHONHASHSEED']=str(seed_value)\n", | |
"\n", | |
"# The below is necessary for starting Numpy generated random numbers\n", | |
"# in a well-defined initial state.\n", | |
"np.random.seed(seed_value)\n", | |
"\n", | |
"# The below is necessary for starting core Python generated random numbers\n", | |
"# in a well-defined state.\n", | |
"python_random.seed(seed_value)\n", | |
"\n", | |
"# The below set_seed() will make random number generation\n", | |
"# in the TensorFlow backend have a well-defined initial state.\n", | |
"# For further details, see:\n", | |
"# https://www.tensorflow.org/api_docs/python/tf/random/set_seed\n", | |
"tf.random.set_seed(seed_value)\n", | |
"\n", | |
"from tensorflow import keras\n", | |
"from tensorflow.keras import layers\n", | |
"\n", | |
"\n", | |
"class TransformerBlock(layers.Layer):\n", | |
" def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):\n", | |
" super(TransformerBlock, self).__init__()\n", | |
" self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)\n", | |
" self.ffn = keras.Sequential(\n", | |
" [layers.Dense(ff_dim, activation=\"relu\"), layers.Dense(embed_dim),]\n", | |
" )\n", | |
" self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)\n", | |
" self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)\n", | |
" self.dropout1 = layers.Dropout(rate)\n", | |
" self.dropout2 = layers.Dropout(rate)\n", | |
"\n", | |
" def call(self, inputs, training):\n", | |
" attn_output = self.att(inputs, inputs)\n", | |
" attn_output = self.dropout1(attn_output, training=training)\n", | |
" out1 = self.layernorm1(inputs + attn_output)\n", | |
" ffn_output = self.ffn(out1)\n", | |
" ffn_output = self.dropout2(ffn_output, training=training)\n", | |
" return self.layernorm2(out1 + ffn_output)\n", | |
"\n", | |
"class TokenAndPositionEmbedding(layers.Layer):\n", | |
" def __init__(self, maxlen, vocab_size, embed_dim):\n", | |
" super(TokenAndPositionEmbedding, self).__init__()\n", | |
" self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)\n", | |
" self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)\n", | |
"\n", | |
" def call(self, x):\n", | |
" maxlen = tf.shape(x)[-1]\n", | |
" positions = tf.range(start=0, limit=maxlen, delta=1)\n", | |
" positions = self.pos_emb(positions)\n", | |
" x = self.token_emb(x)\n", | |
" return x + positions\n", | |
"\n", | |
"\n", | |
"vocab_size = 20000 # Only consider the top 20k words\n", | |
"maxlen = 200 # Only consider the first 200 words of each movie review\n", | |
"(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=vocab_size)\n", | |
"print(len(x_train), \"Training sequences\")\n", | |
"print(len(x_val), \"Validation sequences\")\n", | |
"x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)\n", | |
"x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)\n" | |
], | |
"execution_count": 63, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"<string>:6: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", | |
"/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/datasets/imdb.py:155: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", | |
" x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"25000 Training sequences\n", | |
"25000 Validation sequences\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/datasets/imdb.py:156: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", | |
" x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "rRLtOoAhWo8O", | |
"outputId": "828f2278-11fd-4eb1-cf66-078c0855f38e" | |
}, | |
"source": [ | |
"from tensorflow import keras\n", | |
"from tensorflow.keras import layers\n", | |
"from keras import backend as K\n", | |
"\n", | |
"model = None\n", | |
"del model\n", | |
"\n", | |
"class TransformerBlock(layers.Layer):\n", | |
" def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):\n", | |
" super(TransformerBlock, self).__init__()\n", | |
" self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)\n", | |
" self.ffn = keras.Sequential(\n", | |
" [layers.Dense(ff_dim, activation=\"relu\"), layers.Dense(embed_dim),]\n", | |
" )\n", | |
" self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)\n", | |
" self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)\n", | |
" self.dropout1 = layers.Dropout(rate)\n", | |
" self.dropout2 = layers.Dropout(rate)\n", | |
"\n", | |
" def call(self, inputs, training):\n", | |
" attn_output = self.att(inputs, inputs)\n", | |
" attn_output = self.dropout1(attn_output, training=training)\n", | |
" out1 = self.layernorm1(inputs + attn_output)\n", | |
" ffn_output = self.ffn(out1)\n", | |
" ffn_output = self.dropout2(ffn_output, training=training)\n", | |
" return self.layernorm2(out1 + ffn_output)\n", | |
"\n", | |
"class TokenAndPositionEmbedding(layers.Layer):\n", | |
" def __init__(self, maxlen, vocab_size, embed_dim):\n", | |
" super(TokenAndPositionEmbedding, self).__init__()\n", | |
" self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)\n", | |
" self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)\n", | |
"\n", | |
" def call(self, x):\n", | |
" maxlen = tf.shape(x)[-1]\n", | |
" positions = tf.range(start=0, limit=maxlen, delta=1)\n", | |
" positions = self.pos_emb(positions)\n", | |
" x = self.token_emb(x)\n", | |
" return x + positions\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"embed_dim = 32 # Embedding size for each token\n", | |
"num_heads = 2 # Number of attention heads\n", | |
"ff_dim = 32 # Hidden layer size in feed forward network inside transformer\n", | |
"\n", | |
"inputs = layers.Input(shape=(maxlen,))\n", | |
"embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)\n", | |
"x = embedding_layer(inputs)\n", | |
"transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)\n", | |
"x = transformer_block(x)\n", | |
"x = layers.GlobalAveragePooling1D()(x)\n", | |
"x = layers.Dropout(0.1)(x)\n", | |
"x = layers.Dense(128, activation=\"relu\")(x)\n", | |
"x = layers.Dropout(0.1)(x)\n", | |
"outputs = layers.Dense(2, activation=\"softmax\")(x)\n", | |
"\n", | |
"model = keras.Model(inputs=inputs, outputs=outputs)\n", | |
"\n", | |
"model.compile(\"adam\", \"sparse_categorical_crossentropy\", metrics=[\"accuracy\"])\n", | |
"history = model.fit(\n", | |
" x_train, y_train, batch_size=32, epochs=3, validation_data=(x_val, y_val), shuffle=False\n", | |
")" | |
], | |
"execution_count": 77, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Epoch 1/3\n", | |
"782/782 [==============================] - 130s 163ms/step - loss: 0.3721 - accuracy: 0.8212 - val_loss: 0.2946 - val_accuracy: 0.8746\n", | |
"Epoch 2/3\n", | |
"782/782 [==============================] - 129s 165ms/step - loss: 0.1945 - accuracy: 0.9255 - val_loss: 0.3535 - val_accuracy: 0.8586\n", | |
"Epoch 3/3\n", | |
"782/782 [==============================] - 129s 164ms/step - loss: 0.1405 - accuracy: 0.9475 - val_loss: 0.3696 - val_accuracy: 0.8612\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "R_izJh8QZXy-" | |
}, | |
"source": [ | |
"import pywt\n", | |
"weights = model.layers[5].get_weights()[0]\n", | |
"biases = model.layers[5].get_weights()[1]\n", | |
"\n", | |
"cA, cD = pywt.dwt(weights, 'haar')\n", | |
"cbA, cbD = pywt.dwt(biases, 'haar')\n", | |
"\n", | |
"\n", | |
"#print(pywt.wavelist())\n", | |
"\n", | |
"coeffs = pywt.wavedec(weights, 'haar', level = 4)\n", | |
"cA2, cD4, cD3, cD2, cD1 = coeffs\n", | |
"\n", | |
"\n", | |
"coeffs2 = pywt.wavedec(biases, 'haar', level = 4)\n", | |
"cbA2, cbD4, cbD3, cbD2, cbD1 = coeffs2\n", | |
"\n", | |
"\n", | |
"def doWavelet(arr):\n", | |
" cA, cD = pywt.dwt(arr, 'haar')" | |
], | |
"execution_count": 78, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "PQ6UBSp7ZEKz", | |
"outputId": "54b7e181-01ef-445c-9c14-44e254477446" | |
}, | |
"source": [ | |
"from keras import backend as K \n", | |
"\n", | |
"# Do some code, e.g. train and save model\n", | |
"\n", | |
"K.clear_session()\n", | |
"\n", | |
"model2 = None\n", | |
"x = None\n", | |
"transformer_block = None\n", | |
"outputs = None\n", | |
"del model2\n", | |
"del x\n", | |
"del transformer_block\n", | |
"del outputs\n", | |
"\n", | |
"\n", | |
"inputs = layers.Input(shape=(maxlen,))\n", | |
"embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)\n", | |
"x = embedding_layer(inputs)\n", | |
"transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)\n", | |
"x = transformer_block(x)\n", | |
"x = layers.GlobalAveragePooling1D()(x)\n", | |
"x = layers.Dropout(0.1)(x)\n", | |
"x = layers.Dense(8, activation=\"relu\")(x)\n", | |
"x = layers.Dropout(0.5)(x)\n", | |
"outputs = layers.Dense(2, activation=\"softmax\")(x)\n", | |
"\n", | |
"model2 = keras.Model(inputs=inputs, outputs=outputs)\n", | |
"\n", | |
"\n", | |
"model2.layers[0].set_weights( model.layers[0].get_weights())\n", | |
"model2.layers[1].set_weights( model.layers[1].get_weights())\n", | |
"model2.layers[2].set_weights( model.layers[2].get_weights())\n", | |
"model2.layers[3].set_weights( model.layers[3].get_weights())\n", | |
"model2.layers[4].set_weights( model.layers[4].get_weights())\n", | |
"model2.layers[6].set_weights( model.layers[6].get_weights())\n", | |
"\n", | |
"\n", | |
"model2.compile(\"adam\", \"sparse_categorical_crossentropy\", metrics=[\"accuracy\"])\n", | |
"#history = model2.fit(\n", | |
"# x_train, y_train, batch_size=256, epochs=1, validation_data=(x_val, y_val), shuffle=False\n", | |
"#)\n", | |
"\n", | |
"model2.evaluate(x_val, y_val)" | |
], | |
"execution_count": 85, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"782/782 [==============================] - 38s 48ms/step - loss: 0.7281 - accuracy: 0.3524\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[0.7280932068824768, 0.3523600101470947]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 85 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "i65MSy0-mMau", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "4c4c6076-cb8c-4e8a-97d1-f13dd040522b" | |
}, | |
"source": [ | |
"model3 = None\n", | |
"x = None\n", | |
"transformer_block = None\n", | |
"outputs = None\n", | |
"\n", | |
"del model3\n", | |
"del x\n", | |
"del transformer_block\n", | |
"del outputs\n", | |
"\n", | |
"\n", | |
"inputs = layers.Input(shape=(maxlen,))\n", | |
"embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)\n", | |
"x = embedding_layer(inputs)\n", | |
"transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)\n", | |
"x = transformer_block(x)\n", | |
"x = layers.GlobalAveragePooling1D()(x)\n", | |
"x = layers.Dropout(0.1)(x)\n", | |
"x = layers.Dense(8, activation=\"relu\")(x)\n", | |
"x = layers.Dropout(0.5)(x)\n", | |
"outputs = layers.Dense(2, activation=\"softmax\")(x)\n", | |
"\n", | |
"model3 = keras.Model(inputs=inputs, outputs=outputs)\n", | |
"\n", | |
"\n", | |
"model3.layers[0].set_weights( model.layers[0].get_weights())\n", | |
"model3.layers[1].set_weights( model.layers[1].get_weights())\n", | |
"model3.layers[2].set_weights( model.layers[2].get_weights())\n", | |
"model3.layers[3].set_weights( model.layers[3].get_weights())\n", | |
"model3.layers[4].set_weights( model.layers[4].get_weights())\n", | |
"model3.layers[6].set_weights( model.layers[6].get_weights())\n", | |
"\n", | |
"K.set_value(model3.layers[5].weights[0], cA2)\n", | |
"K.set_value(model3.layers[5].weights[1], cbA2)\n", | |
"\n", | |
"model3.compile(\"adam\", \"sparse_categorical_crossentropy\", metrics=[\"accuracy\"])\n", | |
"#history = model3.fit(\n", | |
"# x_train, y_train, batch_size=256, epochs=1, validation_data=(x_val, y_val), shuffle=False\n", | |
"#)\n", | |
"\n", | |
"model3.evaluate(x_val, y_val)\n", | |
"del model3" | |
], | |
"execution_count": 84, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"782/782 [==============================] - 38s 48ms/step - loss: 0.6664 - accuracy: 0.8208\n" | |
], | |
"name": "stdout" | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment