Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save furkanozbay/0efc000bc7c43bd414a51350f06058af to your computer and use it in GitHub Desktop.
Save furkanozbay/0efc000bc7c43bd414a51350f06058af to your computer and use it in GitHub Desktop.
Text classification with Transformer-OnlyEvaluate.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Text classification with Transformer-OnlyEvaluate.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyNfznPnUteT1LWUVrbPJscc",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/furkanozbay/0efc000bc7c43bd414a51350f06058af/text-classification-with-transformer-onlyevaluate.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "z-Olpy91ZKAj",
"outputId": "a21fe61f-d20a-4f2b-f8b2-75786758176d"
},
"source": [
"import tensorflow as tf\n",
"\n",
"import numpy as np\n",
"import random as python_random\n",
"# Seed value (can actually be different for each attribution step)\n",
"seed_value= 123\n",
"\n",
"# 1. Set `PYTHONHASHSEED` environment variable at a fixed value\n",
"import os\n",
"os.environ['PYTHONHASHSEED']=str(seed_value)\n",
"\n",
"# The below is necessary for starting Numpy generated random numbers\n",
"# in a well-defined initial state.\n",
"np.random.seed(seed_value)\n",
"\n",
"# The below is necessary for starting core Python generated random numbers\n",
"# in a well-defined state.\n",
"python_random.seed(seed_value)\n",
"\n",
"# The below set_seed() will make random number generation\n",
"# in the TensorFlow backend have a well-defined initial state.\n",
"# For further details, see:\n",
"# https://www.tensorflow.org/api_docs/python/tf/random/set_seed\n",
"tf.random.set_seed(seed_value)\n",
"\n",
"from tensorflow import keras\n",
"from tensorflow.keras import layers\n",
"\n",
"\n",
"class TransformerBlock(layers.Layer):\n",
" def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):\n",
" super(TransformerBlock, self).__init__()\n",
" self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)\n",
" self.ffn = keras.Sequential(\n",
" [layers.Dense(ff_dim, activation=\"relu\"), layers.Dense(embed_dim),]\n",
" )\n",
" self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)\n",
" self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)\n",
" self.dropout1 = layers.Dropout(rate)\n",
" self.dropout2 = layers.Dropout(rate)\n",
"\n",
" def call(self, inputs, training):\n",
" attn_output = self.att(inputs, inputs)\n",
" attn_output = self.dropout1(attn_output, training=training)\n",
" out1 = self.layernorm1(inputs + attn_output)\n",
" ffn_output = self.ffn(out1)\n",
" ffn_output = self.dropout2(ffn_output, training=training)\n",
" return self.layernorm2(out1 + ffn_output)\n",
"\n",
"class TokenAndPositionEmbedding(layers.Layer):\n",
" def __init__(self, maxlen, vocab_size, embed_dim):\n",
" super(TokenAndPositionEmbedding, self).__init__()\n",
" self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)\n",
" self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)\n",
"\n",
" def call(self, x):\n",
" maxlen = tf.shape(x)[-1]\n",
" positions = tf.range(start=0, limit=maxlen, delta=1)\n",
" positions = self.pos_emb(positions)\n",
" x = self.token_emb(x)\n",
" return x + positions\n",
"\n",
"\n",
"vocab_size = 20000 # Only consider the top 20k words\n",
"maxlen = 200 # Only consider the first 200 words of each movie review\n",
"(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=vocab_size)\n",
"print(len(x_train), \"Training sequences\")\n",
"print(len(x_val), \"Validation sequences\")\n",
"x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)\n",
"x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)\n"
],
"execution_count": 63,
"outputs": [
{
"output_type": "stream",
"text": [
"<string>:6: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
"/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/datasets/imdb.py:155: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
" x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"25000 Training sequences\n",
"25000 Validation sequences\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/datasets/imdb.py:156: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n",
" x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rRLtOoAhWo8O",
"outputId": "828f2278-11fd-4eb1-cf66-078c0855f38e"
},
"source": [
"from tensorflow import keras\n",
"from tensorflow.keras import layers\n",
"from keras import backend as K\n",
"\n",
"model = None\n",
"del model\n",
"\n",
"class TransformerBlock(layers.Layer):\n",
" def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):\n",
" super(TransformerBlock, self).__init__()\n",
" self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)\n",
" self.ffn = keras.Sequential(\n",
" [layers.Dense(ff_dim, activation=\"relu\"), layers.Dense(embed_dim),]\n",
" )\n",
" self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)\n",
" self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)\n",
" self.dropout1 = layers.Dropout(rate)\n",
" self.dropout2 = layers.Dropout(rate)\n",
"\n",
" def call(self, inputs, training):\n",
" attn_output = self.att(inputs, inputs)\n",
" attn_output = self.dropout1(attn_output, training=training)\n",
" out1 = self.layernorm1(inputs + attn_output)\n",
" ffn_output = self.ffn(out1)\n",
" ffn_output = self.dropout2(ffn_output, training=training)\n",
" return self.layernorm2(out1 + ffn_output)\n",
"\n",
"class TokenAndPositionEmbedding(layers.Layer):\n",
" def __init__(self, maxlen, vocab_size, embed_dim):\n",
" super(TokenAndPositionEmbedding, self).__init__()\n",
" self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)\n",
" self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)\n",
"\n",
" def call(self, x):\n",
" maxlen = tf.shape(x)[-1]\n",
" positions = tf.range(start=0, limit=maxlen, delta=1)\n",
" positions = self.pos_emb(positions)\n",
" x = self.token_emb(x)\n",
" return x + positions\n",
"\n",
"\n",
"\n",
"\n",
"embed_dim = 32 # Embedding size for each token\n",
"num_heads = 2 # Number of attention heads\n",
"ff_dim = 32 # Hidden layer size in feed forward network inside transformer\n",
"\n",
"inputs = layers.Input(shape=(maxlen,))\n",
"embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)\n",
"x = embedding_layer(inputs)\n",
"transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)\n",
"x = transformer_block(x)\n",
"x = layers.GlobalAveragePooling1D()(x)\n",
"x = layers.Dropout(0.1)(x)\n",
"x = layers.Dense(128, activation=\"relu\")(x)\n",
"x = layers.Dropout(0.1)(x)\n",
"outputs = layers.Dense(2, activation=\"softmax\")(x)\n",
"\n",
"model = keras.Model(inputs=inputs, outputs=outputs)\n",
"\n",
"model.compile(\"adam\", \"sparse_categorical_crossentropy\", metrics=[\"accuracy\"])\n",
"history = model.fit(\n",
" x_train, y_train, batch_size=32, epochs=3, validation_data=(x_val, y_val), shuffle=False\n",
")"
],
"execution_count": 77,
"outputs": [
{
"output_type": "stream",
"text": [
"Epoch 1/3\n",
"782/782 [==============================] - 130s 163ms/step - loss: 0.3721 - accuracy: 0.8212 - val_loss: 0.2946 - val_accuracy: 0.8746\n",
"Epoch 2/3\n",
"782/782 [==============================] - 129s 165ms/step - loss: 0.1945 - accuracy: 0.9255 - val_loss: 0.3535 - val_accuracy: 0.8586\n",
"Epoch 3/3\n",
"782/782 [==============================] - 129s 164ms/step - loss: 0.1405 - accuracy: 0.9475 - val_loss: 0.3696 - val_accuracy: 0.8612\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "R_izJh8QZXy-"
},
"source": [
"import pywt\n",
"weights = model.layers[5].get_weights()[0]\n",
"biases = model.layers[5].get_weights()[1]\n",
"\n",
"cA, cD = pywt.dwt(weights, 'haar')\n",
"cbA, cbD = pywt.dwt(biases, 'haar')\n",
"\n",
"\n",
"#print(pywt.wavelist())\n",
"\n",
"coeffs = pywt.wavedec(weights, 'haar', level = 4)\n",
"cA2, cD4, cD3, cD2, cD1 = coeffs\n",
"\n",
"\n",
"coeffs2 = pywt.wavedec(biases, 'haar', level = 4)\n",
"cbA2, cbD4, cbD3, cbD2, cbD1 = coeffs2\n",
"\n",
"\n",
"def doWavelet(arr):\n",
" cA, cD = pywt.dwt(arr, 'haar')"
],
"execution_count": 78,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "PQ6UBSp7ZEKz",
"outputId": "54b7e181-01ef-445c-9c14-44e254477446"
},
"source": [
"from keras import backend as K \n",
"\n",
"# Do some code, e.g. train and save model\n",
"\n",
"K.clear_session()\n",
"\n",
"model2 = None\n",
"x = None\n",
"transformer_block = None\n",
"outputs = None\n",
"del model2\n",
"del x\n",
"del transformer_block\n",
"del outputs\n",
"\n",
"\n",
"inputs = layers.Input(shape=(maxlen,))\n",
"embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)\n",
"x = embedding_layer(inputs)\n",
"transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)\n",
"x = transformer_block(x)\n",
"x = layers.GlobalAveragePooling1D()(x)\n",
"x = layers.Dropout(0.1)(x)\n",
"x = layers.Dense(8, activation=\"relu\")(x)\n",
"x = layers.Dropout(0.5)(x)\n",
"outputs = layers.Dense(2, activation=\"softmax\")(x)\n",
"\n",
"model2 = keras.Model(inputs=inputs, outputs=outputs)\n",
"\n",
"\n",
"model2.layers[0].set_weights( model.layers[0].get_weights())\n",
"model2.layers[1].set_weights( model.layers[1].get_weights())\n",
"model2.layers[2].set_weights( model.layers[2].get_weights())\n",
"model2.layers[3].set_weights( model.layers[3].get_weights())\n",
"model2.layers[4].set_weights( model.layers[4].get_weights())\n",
"model2.layers[6].set_weights( model.layers[6].get_weights())\n",
"\n",
"\n",
"model2.compile(\"adam\", \"sparse_categorical_crossentropy\", metrics=[\"accuracy\"])\n",
"#history = model2.fit(\n",
"# x_train, y_train, batch_size=256, epochs=1, validation_data=(x_val, y_val), shuffle=False\n",
"#)\n",
"\n",
"model2.evaluate(x_val, y_val)"
],
"execution_count": 85,
"outputs": [
{
"output_type": "stream",
"text": [
"782/782 [==============================] - 38s 48ms/step - loss: 0.7281 - accuracy: 0.3524\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[0.7280932068824768, 0.3523600101470947]"
]
},
"metadata": {
"tags": []
},
"execution_count": 85
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "i65MSy0-mMau",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "4c4c6076-cb8c-4e8a-97d1-f13dd040522b"
},
"source": [
"model3 = None\n",
"x = None\n",
"transformer_block = None\n",
"outputs = None\n",
"\n",
"del model3\n",
"del x\n",
"del transformer_block\n",
"del outputs\n",
"\n",
"\n",
"inputs = layers.Input(shape=(maxlen,))\n",
"embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)\n",
"x = embedding_layer(inputs)\n",
"transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)\n",
"x = transformer_block(x)\n",
"x = layers.GlobalAveragePooling1D()(x)\n",
"x = layers.Dropout(0.1)(x)\n",
"x = layers.Dense(8, activation=\"relu\")(x)\n",
"x = layers.Dropout(0.5)(x)\n",
"outputs = layers.Dense(2, activation=\"softmax\")(x)\n",
"\n",
"model3 = keras.Model(inputs=inputs, outputs=outputs)\n",
"\n",
"\n",
"model3.layers[0].set_weights( model.layers[0].get_weights())\n",
"model3.layers[1].set_weights( model.layers[1].get_weights())\n",
"model3.layers[2].set_weights( model.layers[2].get_weights())\n",
"model3.layers[3].set_weights( model.layers[3].get_weights())\n",
"model3.layers[4].set_weights( model.layers[4].get_weights())\n",
"model3.layers[6].set_weights( model.layers[6].get_weights())\n",
"\n",
"K.set_value(model3.layers[5].weights[0], cA2)\n",
"K.set_value(model3.layers[5].weights[1], cbA2)\n",
"\n",
"model3.compile(\"adam\", \"sparse_categorical_crossentropy\", metrics=[\"accuracy\"])\n",
"#history = model3.fit(\n",
"# x_train, y_train, batch_size=256, epochs=1, validation_data=(x_val, y_val), shuffle=False\n",
"#)\n",
"\n",
"model3.evaluate(x_val, y_val)\n",
"del model3"
],
"execution_count": 84,
"outputs": [
{
"output_type": "stream",
"text": [
"782/782 [==============================] - 38s 48ms/step - loss: 0.6664 - accuracy: 0.8208\n"
],
"name": "stdout"
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment