Skip to content

Instantly share code, notes, and snippets.

@aizvorski
Created August 5, 2020 00:24
Show Gist options
  • Save aizvorski/562211fe3656b2fb7c6e1adeb90834b8 to your computer and use it in GitHub Desktop.
Save aizvorski/562211fe3656b2fb7c6e1adeb90834b8 to your computer and use it in GitHub Desktop.
Taylor_Expansion_Keras_CIFAR10.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Taylor_Expansion_Keras_CIFAR10.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "dDv96h2Lbn2p",
"colab_type": "code",
"colab": {}
},
"source": [
"%matplotlib inline\n",
"\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import tensorflow as tf\n",
"from tensorflow.keras.models import Sequential\n",
"\n",
"from tensorflow.keras.layers import Dense, Dropout, Activation, Conv2D\n",
"from tensorflow.keras.utils import to_categorical"
],
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "zS7O34h0coNC",
"colab_type": "code",
"colab": {}
},
"source": [
"from tensorflow.keras.preprocessing.image import ImageDataGenerator\n",
"from tensorflow.keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D, Flatten\n",
"from tensorflow.keras.layers import BatchNormalization"
],
"execution_count": 7,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "ArkmCvoo_c1f",
"colab_type": "text"
},
"source": [
"# Get CIFAR10 dataset"
]
},
{
"cell_type": "code",
"metadata": {
"id": "FKGtzWygiqRe",
"colab_type": "code",
"colab": {}
},
"source": [
"from tensorflow.keras.datasets import cifar10\n",
"(X_train, y_train), (X_test, y_test) = cifar10.load_data()\n",
"X_train = X_train.astype('float32') / 255.0\n",
"X_test = X_test.astype('float32') / 255.0\n",
"nb_classes = 10\n",
"Y_train = to_categorical(y_train, nb_classes)\n",
"Y_test = to_categorical(y_test, nb_classes)"
],
"execution_count": 9,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "MkRIXjEo_jAR",
"colab_type": "text"
},
"source": [
"# Define simple convolutional model - optionally with Taylor expansion of the inputs"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ws0CS_-ngeyw",
"colab_type": "code",
"colab": {}
},
"source": [
"def get_model(use_taylor_expansion=True):\n",
" inputs = tf.keras.Input(shape=(32, 32, 3))\n",
"\n",
" x = inputs\n",
"\n",
" if use_taylor_expansion:\n",
" n_terms = 2\n",
" c = tf.constant([1, -1/6])\n",
" p = tf.constant([1, 3], dtype=tf.float32)\n",
"\n",
" terms = []\n",
" for i in range(n_terms):\n",
" m = c[i] * tf.math.pow(x, p[i])\n",
" terms.append(m)\n",
" expansion = tf.math.cumsum(terms)\n",
" expansion_terms_last = tf.transpose(expansion, perm=[1, 2, 3, 4, 0])\n",
" x = tf.reshape(expansion_terms_last, tf.constant([-1, 32, 32, 3*2])) \n",
"\n",
" x = Conv2D(32, (3, 3), input_shape=(32,32,3*2))(x)\n",
" else:\n",
" x = Conv2D(32, (3, 3), input_shape=(32,32,3))(x)\n",
" \n",
" x = BatchNormalization(axis=-1)(x)\n",
" x = Activation('relu')(x)\n",
"\n",
" x = Conv2D(32, (3, 3))(x)\n",
" x = BatchNormalization(axis=-1)(x)\n",
" x = Activation('relu')(x)\n",
" x = MaxPooling2D(pool_size=(2,2))(x)\n",
"\n",
" x = Conv2D(64,(3, 3))(x)\n",
" x = BatchNormalization(axis=-1)(x)\n",
" x = Activation('relu')(x)\n",
"\n",
" x = Conv2D(64, (3, 3))(x)\n",
" x = BatchNormalization(axis=-1)(x)\n",
" x = Activation('relu')(x)\n",
" x = MaxPooling2D(pool_size=(2,2))(x)\n",
" x = Flatten()(x)\n",
"\n",
" x = Dense(512)(x)\n",
" x = BatchNormalization()(x)\n",
" x = Activation('relu')(x)\n",
"\n",
" x = Dropout(0.2)(x)\n",
" x = Dense(10)(x)\n",
" x = Activation('softmax')(x)\n",
" outputs = x\n",
"\n",
" model = tf.keras.Model(inputs=inputs, outputs=outputs, name=\"cifar10_model\")\n",
" return model"
],
"execution_count": 10,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "CGVfyl0g_WhV",
"colab_type": "text"
},
"source": [
"# Train model with expansion"
]
},
{
"cell_type": "code",
"metadata": {
"id": "5Aj5s3-TdAG0",
"colab_type": "code",
"colab": {}
},
"source": [
"model = get_model(use_taylor_expansion=True)\n",
"model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])"
],
"execution_count": 12,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "wetyc92nii20",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "edcd76d7-7898-4f7b-8d51-6cb8d8ccae52"
},
"source": [
"model.summary()"
],
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"text": [
"Model: \"cifar10_model\"\n",
"__________________________________________________________________________________________________\n",
"Layer (type) Output Shape Param # Connected to \n",
"==================================================================================================\n",
"input_4 (InputLayer) [(None, 32, 32, 3)] 0 \n",
"__________________________________________________________________________________________________\n",
"tf_op_layer_Pow_6 (TensorFlowOp [(None, 32, 32, 3)] 0 input_4[0][0] \n",
"__________________________________________________________________________________________________\n",
"tf_op_layer_Pow_7 (TensorFlowOp [(None, 32, 32, 3)] 0 input_4[0][0] \n",
"__________________________________________________________________________________________________\n",
"tf_op_layer_Mul_6 (TensorFlowOp [(None, 32, 32, 3)] 0 tf_op_layer_Pow_6[0][0] \n",
"__________________________________________________________________________________________________\n",
"tf_op_layer_Mul_7 (TensorFlowOp [(None, 32, 32, 3)] 0 tf_op_layer_Pow_7[0][0] \n",
"__________________________________________________________________________________________________\n",
"tf_op_layer_x_3 (TensorFlowOpLa [(2, None, 32, 32, 3 0 tf_op_layer_Mul_6[0][0] \n",
" tf_op_layer_Mul_7[0][0] \n",
"__________________________________________________________________________________________________\n",
"tf_op_layer_Cumsum_3 (TensorFlo [(2, None, 32, 32, 3 0 tf_op_layer_x_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"tf_op_layer_Transpose_3 (Tensor [(None, 32, 32, 3, 2 0 tf_op_layer_Cumsum_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"tf_op_layer_Reshape_3 (TensorFl [(None, 32, 32, 6)] 0 tf_op_layer_Transpose_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_5 (Conv2D) (None, 30, 30, 32) 1760 tf_op_layer_Reshape_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"batch_normalization_5 (BatchNor (None, 30, 30, 32) 128 conv2d_5[0][0] \n",
"__________________________________________________________________________________________________\n",
"activation_6 (Activation) (None, 30, 30, 32) 0 batch_normalization_5[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_6 (Conv2D) (None, 28, 28, 32) 9248 activation_6[0][0] \n",
"__________________________________________________________________________________________________\n",
"batch_normalization_6 (BatchNor (None, 28, 28, 32) 128 conv2d_6[0][0] \n",
"__________________________________________________________________________________________________\n",
"activation_7 (Activation) (None, 28, 28, 32) 0 batch_normalization_6[0][0] \n",
"__________________________________________________________________________________________________\n",
"max_pooling2d_2 (MaxPooling2D) (None, 14, 14, 32) 0 activation_7[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_7 (Conv2D) (None, 12, 12, 64) 18496 max_pooling2d_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"batch_normalization_7 (BatchNor (None, 12, 12, 64) 256 conv2d_7[0][0] \n",
"__________________________________________________________________________________________________\n",
"activation_8 (Activation) (None, 12, 12, 64) 0 batch_normalization_7[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_8 (Conv2D) (None, 10, 10, 64) 36928 activation_8[0][0] \n",
"__________________________________________________________________________________________________\n",
"batch_normalization_8 (BatchNor (None, 10, 10, 64) 256 conv2d_8[0][0] \n",
"__________________________________________________________________________________________________\n",
"activation_9 (Activation) (None, 10, 10, 64) 0 batch_normalization_8[0][0] \n",
"__________________________________________________________________________________________________\n",
"max_pooling2d_3 (MaxPooling2D) (None, 5, 5, 64) 0 activation_9[0][0] \n",
"__________________________________________________________________________________________________\n",
"flatten_1 (Flatten) (None, 1600) 0 max_pooling2d_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"dense_2 (Dense) (None, 512) 819712 flatten_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"batch_normalization_9 (BatchNor (None, 512) 2048 dense_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"activation_10 (Activation) (None, 512) 0 batch_normalization_9[0][0] \n",
"__________________________________________________________________________________________________\n",
"dropout_1 (Dropout) (None, 512) 0 activation_10[0][0] \n",
"__________________________________________________________________________________________________\n",
"dense_3 (Dense) (None, 10) 5130 dropout_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"activation_11 (Activation) (None, 10) 0 dense_3[0][0] \n",
"==================================================================================================\n",
"Total params: 894,090\n",
"Trainable params: 892,682\n",
"Non-trainable params: 1,408\n",
"__________________________________________________________________________________________________\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "4s5wrxpLdFfa",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 315
},
"outputId": "4ec8ab8a-86cc-4a4f-a431-0020987a022e"
},
"source": [
"gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,\n",
" height_shift_range=0.08, zoom_range=0.08)\n",
"\n",
"test_gen = ImageDataGenerator()\n",
"\n",
"train_generator = gen.flow(X_train, Y_train, batch_size=128)\n",
"test_generator = test_gen.flow(X_test, Y_test, batch_size=128)\n",
"\n",
"model.fit_generator(train_generator, steps_per_epoch=50000//128, epochs=5, verbose=1, \n",
" validation_data=test_generator, validation_steps=10000//128)\n",
"\n",
"score = model.evaluate(X_test, Y_test)\n",
"print('Test score:', score[0])\n",
"print('Test accuracy:', score[1])"
],
"execution_count": 14,
"outputs": [
{
"output_type": "stream",
"text": [
"WARNING:tensorflow:From <ipython-input-14-5ae0d48ee43d>:10: Model.fit_generator (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please use Model.fit, which supports generators.\n",
"Epoch 1/5\n",
"390/390 [==============================] - 24s 62ms/step - loss: 1.3718 - accuracy: 0.5113 - val_loss: 1.4123 - val_accuracy: 0.4833\n",
"Epoch 2/5\n",
"390/390 [==============================] - 25s 63ms/step - loss: 0.9821 - accuracy: 0.6525 - val_loss: 1.0716 - val_accuracy: 0.6417\n",
"Epoch 3/5\n",
"390/390 [==============================] - 24s 62ms/step - loss: 0.8225 - accuracy: 0.7103 - val_loss: 0.9748 - val_accuracy: 0.6632\n",
"Epoch 4/5\n",
"390/390 [==============================] - 24s 62ms/step - loss: 0.7378 - accuracy: 0.7405 - val_loss: 0.8508 - val_accuracy: 0.7212\n",
"Epoch 5/5\n",
"390/390 [==============================] - 25s 63ms/step - loss: 0.6842 - accuracy: 0.7598 - val_loss: 0.8672 - val_accuracy: 0.7072\n",
"313/313 [==============================] - 1s 3ms/step - loss: 0.8673 - accuracy: 0.7073\n",
"Test score: 0.8673319220542908\n",
"Test accuracy: 0.7073000073432922\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ELoZGval_MsN",
"colab_type": "text"
},
"source": [
"# Train model without expansion"
]
},
{
"cell_type": "code",
"metadata": {
"id": "IBBclS0y2_DV",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 243
},
"outputId": "097a099f-c280-4a17-b613-7b39588b500f"
},
"source": [
"model = get_model(use_taylor_expansion=False)\n",
"model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
"\n",
"gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,\n",
" height_shift_range=0.08, zoom_range=0.08)\n",
"\n",
"test_gen = ImageDataGenerator()\n",
"\n",
"train_generator = gen.flow(X_train, Y_train, batch_size=128)\n",
"test_generator = test_gen.flow(X_test, Y_test, batch_size=128)\n",
"\n",
"model.fit_generator(train_generator, steps_per_epoch=50000//128, epochs=5, verbose=1, \n",
" validation_data=test_generator, validation_steps=10000//128)\n",
"\n",
"score = model.evaluate(X_test, Y_test)\n",
"print('Test score:', score[0])\n",
"print('Test accuracy:', score[1])"
],
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"text": [
"Epoch 1/5\n",
"390/390 [==============================] - 24s 61ms/step - loss: 1.3601 - accuracy: 0.5172 - val_loss: 1.4695 - val_accuracy: 0.4922\n",
"Epoch 2/5\n",
"390/390 [==============================] - 24s 61ms/step - loss: 0.9689 - accuracy: 0.6570 - val_loss: 0.9249 - val_accuracy: 0.6747\n",
"Epoch 3/5\n",
"390/390 [==============================] - 24s 61ms/step - loss: 0.8296 - accuracy: 0.7082 - val_loss: 0.8755 - val_accuracy: 0.6980\n",
"Epoch 4/5\n",
"390/390 [==============================] - 24s 62ms/step - loss: 0.7452 - accuracy: 0.7369 - val_loss: 0.9241 - val_accuracy: 0.6824\n",
"Epoch 5/5\n",
"390/390 [==============================] - 24s 62ms/step - loss: 0.6882 - accuracy: 0.7590 - val_loss: 0.8379 - val_accuracy: 0.7107\n",
"313/313 [==============================] - 1s 3ms/step - loss: 0.8380 - accuracy: 0.7108\n",
"Test score: 0.83797287940979\n",
"Test accuracy: 0.7107999920845032\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "KZ8R2n9-89D5",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 920
},
"outputId": "63d159a9-b967-42a4-943b-0c4d4b63089e"
},
"source": [
"model.summary()"
],
"execution_count": 17,
"outputs": [
{
"output_type": "stream",
"text": [
"Model: \"cifar10_model\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"input_6 (InputLayer) [(None, 32, 32, 3)] 0 \n",
"_________________________________________________________________\n",
"conv2d_13 (Conv2D) (None, 30, 30, 32) 896 \n",
"_________________________________________________________________\n",
"batch_normalization_15 (Batc (None, 30, 30, 32) 128 \n",
"_________________________________________________________________\n",
"activation_18 (Activation) (None, 30, 30, 32) 0 \n",
"_________________________________________________________________\n",
"conv2d_14 (Conv2D) (None, 28, 28, 32) 9248 \n",
"_________________________________________________________________\n",
"batch_normalization_16 (Batc (None, 28, 28, 32) 128 \n",
"_________________________________________________________________\n",
"activation_19 (Activation) (None, 28, 28, 32) 0 \n",
"_________________________________________________________________\n",
"max_pooling2d_6 (MaxPooling2 (None, 14, 14, 32) 0 \n",
"_________________________________________________________________\n",
"conv2d_15 (Conv2D) (None, 12, 12, 64) 18496 \n",
"_________________________________________________________________\n",
"batch_normalization_17 (Batc (None, 12, 12, 64) 256 \n",
"_________________________________________________________________\n",
"activation_20 (Activation) (None, 12, 12, 64) 0 \n",
"_________________________________________________________________\n",
"conv2d_16 (Conv2D) (None, 10, 10, 64) 36928 \n",
"_________________________________________________________________\n",
"batch_normalization_18 (Batc (None, 10, 10, 64) 256 \n",
"_________________________________________________________________\n",
"activation_21 (Activation) (None, 10, 10, 64) 0 \n",
"_________________________________________________________________\n",
"max_pooling2d_7 (MaxPooling2 (None, 5, 5, 64) 0 \n",
"_________________________________________________________________\n",
"flatten_3 (Flatten) (None, 1600) 0 \n",
"_________________________________________________________________\n",
"dense_6 (Dense) (None, 512) 819712 \n",
"_________________________________________________________________\n",
"batch_normalization_19 (Batc (None, 512) 2048 \n",
"_________________________________________________________________\n",
"activation_22 (Activation) (None, 512) 0 \n",
"_________________________________________________________________\n",
"dropout_3 (Dropout) (None, 512) 0 \n",
"_________________________________________________________________\n",
"dense_7 (Dense) (None, 10) 5130 \n",
"_________________________________________________________________\n",
"activation_23 (Activation) (None, 10) 0 \n",
"=================================================================\n",
"Total params: 893,226\n",
"Trainable params: 891,818\n",
"Non-trainable params: 1,408\n",
"_________________________________________________________________\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "dhyhX3WX-PJJ",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}
@jshayiding
Copy link

Hi Alex, I am trying to make building block for Taylor expansion so I can use it any where after Conv2D, but my attempt is not very satisfying. Could you give possible thoughts on that? If we have multiple features maps after applying Taylor series of sin(x), how can we stack them together? Any possible thoughts? Thanks a lot !

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment