kumarjitpathakbangalore/Author_classification_using_CNN_on_Text_an_example_of_INCREMENTAL_LEARNING.ipynb

## Author_classification_using_CNN_on_Text_an_example_of_INCREMENTAL_LEARNING.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 34,
     "output_extras": [
      {
       "item_id": 1
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 9077,
     "status": "ok",
     "timestamp": 1521106125460,
     "user": {
      "displayName": "prabhukiran g",
      "photoUrl": "//lh4.googleusercontent.com/-BYx7PrXEoDk/AAAAAAAAAAI/AAAAAAAAHQ4/Rp2XJAt4a9w/s50-c-k-no/photo.jpg",
      "userId": "111602092102230767926"
     },
     "user_tz": -330
    },
    "id": "I7Xmt_UZju47",
    "outputId": "4c8a5e21-847e-4769-cfd0-23905695dfc9"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/device:GPU:0'"
      ]
     },
     "execution_count": 1,
     "metadata": {
      "tags": []
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "tf.test.gpu_device_name()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 34,
     "output_extras": [
      {
       "item_id": 1
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 2901,
     "status": "ok",
     "timestamp": 1521106134982,
     "user": {
      "displayName": "prabhukiran g",
      "photoUrl": "//lh4.googleusercontent.com/-BYx7PrXEoDk/AAAAAAAAAAI/AAAAAAAAHQ4/Rp2XJAt4a9w/s50-c-k-no/photo.jpg",
      "userId": "111602092102230767926"
     },
     "user_tz": -330
    },
    "id": "IXAgnpJ-jyJp",
    "outputId": "fd58e686-b27b-4d1f-9d8c-da0ae7014d54"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: psutil in /usr/local/lib/python3.6/dist-packages\r\n"
     ]
    }
   ],
   "source": [
    "! pip install psutil\n",
    "import psutil\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 34,
     "output_extras": [
      {
       "item_id": 1
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 984,
     "status": "ok",
     "timestamp": 1521106137947,
     "user": {
      "displayName": "prabhukiran g",
      "photoUrl": "//lh4.googleusercontent.com/-BYx7PrXEoDk/AAAAAAAAAAI/AAAAAAAAHQ4/Rp2XJAt4a9w/s50-c-k-no/photo.jpg",
      "userId": "111602092102230767926"
     },
     "user_tz": -330
    },
    "id": "7tJw7PfXj-yQ",
    "outputId": "ac670c21-afa4-4c3f-ee47-63c87b4b965d"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.5"
      ]
     },
     "execution_count": 3,
     "metadata": {
      "tags": []
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "psutil.cpu_percent()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 54,
     "output_extras": [
      {
       "item_id": 1
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1119,
     "status": "ok",
     "timestamp": 1521106140803,
     "user": {
      "displayName": "prabhukiran g",
      "photoUrl": "//lh4.googleusercontent.com/-BYx7PrXEoDk/AAAAAAAAAAI/AAAAAAAAHQ4/Rp2XJAt4a9w/s50-c-k-no/photo.jpg",
      "userId": "111602092102230767926"
     },
     "user_tz": -330
    },
    "id": "8vad4yfckBRm",
    "outputId": "47d703db-99d9-40df-e3aa-a983e17118a6"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "svmem(total=13662167040, available=11546234880, percent=15.5, used=9500655616, free=4161511424, active=3905454080, inactive=4377460736, buffers=741838848, cached=6642884608, shared=290521088)"
      ]
     },
     "execution_count": 4,
     "metadata": {
      "tags": []
     },
     "output_type": "execute_result"
    }
   ],
   "source": [
    "psutil.virtual_memory()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 101,
     "output_extras": [
      {
       "item_id": 2
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 2562,
     "status": "ok",
     "timestamp": 1521106146744,
     "user": {
      "displayName": "prabhukiran g",
      "photoUrl": "//lh4.googleusercontent.com/-BYx7PrXEoDk/AAAAAAAAAAI/AAAAAAAAHQ4/Rp2XJAt4a9w/s50-c-k-no/photo.jpg",
      "userId": "111602092102230767926"
     },
     "user_tz": -330
    },
    "id": "hBxc7ymskDnZ",
    "outputId": "1a9bbead-c746-4817-a78f-f513d600209c"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: keras in /usr/local/lib/python3.6/dist-packages\n",
      "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from keras)\n",
      "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from keras)\n",
      "Requirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.6/dist-packages (from keras)\n",
      "Requirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.6/dist-packages (from keras)\n"
     ]
    }
   ],
   "source": [
    "! pip install keras"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 34,
     "output_extras": [
      {
       "item_id": 1
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1073,
     "status": "ok",
     "timestamp": 1521106148888,
     "user": {
      "displayName": "prabhukiran g",
      "photoUrl": "//lh4.googleusercontent.com/-BYx7PrXEoDk/AAAAAAAAAAI/AAAAAAAAHQ4/Rp2XJAt4a9w/s50-c-k-no/photo.jpg",
      "userId": "111602092102230767926"
     },
     "user_tz": -330
    },
    "id": "2_Y9pGNtkFNU",
    "outputId": "919777f2-f253-4192-ffec-dbe8699db850"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import random\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import tensorflow as tf\n",
    "\n",
    "from datetime import datetime\n",
    "from keras.models import Model\n",
    "from keras import regularizers\n",
    "from keras.layers import Embedding\n",
    "from sklearn.metrics import f1_score\n",
    "from keras.optimizers import RMSprop\n",
    "from keras.layers.core import Flatten\n",
    "from keras.utils import to_categorical\n",
    "from keras.preprocessing.text import Tokenizer\n",
    "from sklearn.preprocessing import LabelBinarizer\n",
    "from sklearn.model_selection import train_test_split\n",
    "from keras.preprocessing.sequence import pad_sequences\n",
    "from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard\n",
    "from keras.layers import Dense, Input, Embedding, Dropout, Conv1D, Conv2D, MaxPooling1D, MaxPooling2D"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "-TjWWWtHkH5a"
   },
   "outputs": [],
   "source": [
    "def pre_processing(X, y = None, vocab_len = 26, vec_len = 400):\n",
    "    X = [i[:-1] for i in X] # Remove last character from each entry \n",
    "    alphabet = [] \n",
    "    for letter in range(97,123):\n",
    "        alphabet.append(chr(letter)) # Create a list of alphabets (lower case)\n",
    "    t = Tokenizer(char_level = True)\n",
    "    t.fit_on_texts(alphabet) # Tokenize the list of alphabets\n",
    "    seq = pad_sequences(t.texts_to_sequences(X), maxlen=vec_len, padding='post', truncating='post', value=0)\n",
    "    # Create a sequence from each data point with maximum length of 400\n",
    "    # If the entry is less than 400 character length, pad it with 0s\n",
    "    binarizer = LabelBinarizer()\n",
    "    binarizer.fit(range(26)) \n",
    "    X = np.array([binarizer.transform(x) for x in seq]) # Binarize the entries\n",
    "    if y is None:\n",
    "        return X\n",
    "    else:\n",
    "        y = [i[:-1] for i in y]\n",
    "        y = to_categorical(y) # Convert dependent variable to one-hot encoding\n",
    "        return X, y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "VRcrZfOfk_Wk"
   },
   "outputs": [],
   "source": [
    "!pip install -U -q PyDrive\n",
    "from pydrive.auth import GoogleAuth\n",
    "from pydrive.drive import GoogleDrive\n",
    "from google.colab import auth\n",
    "from oauth2client.client import GoogleCredentials\n",
    "\n",
    "auth.authenticate_user()\n",
    "gauth = GoogleAuth()\n",
    "gauth.credentials = GoogleCredentials.get_application_default()\n",
    "drive = GoogleDrive(gauth)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "BCjYGDEClCun"
   },
   "outputs": [],
   "source": [
    "file1 = drive.CreateFile({'id':'19L3AgQklbHhMO7R_4GDPdUvy8cKrOjwM'}) \n",
    "file1.GetContentFile('xtrain_obfuscated.txt')\n",
    "\n",
    "file2 = drive.CreateFile({'id':'1dmuj4QM88jD0V1XKn9WabOAkOPszWHwP'}) \n",
    "file2.GetContentFile('xtest_obfuscated.txt')\n",
    "\n",
    "file3 = drive.CreateFile({'id':'1LxKK6kxERLHOEJy3HjgZbIW1gfr9kQks'}) \n",
    "file3.GetContentFile('ytrain.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "p1sGmtIWmkY0"
   },
   "outputs": [],
   "source": [
    "file = open('xtrain_obfuscated.txt', 'r')\n",
    "X = file.readlines()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "WvCICnu5m1td"
   },
   "outputs": [],
   "source": [
    "file = open('ytrain.txt', 'r') \n",
    "y = file.readlines()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 34,
     "output_extras": [
      {
       "item_id": 1
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 24830,
     "status": "ok",
     "timestamp": 1521106303227,
     "user": {
      "displayName": "prabhukiran g",
      "photoUrl": "//lh4.googleusercontent.com/-BYx7PrXEoDk/AAAAAAAAAAI/AAAAAAAAHQ4/Rp2XJAt4a9w/s50-c-k-no/photo.jpg",
      "userId": "111602092102230767926"
     },
     "user_tz": -330
    },
    "id": "7D21YPqOmtkb",
    "outputId": "6c48b4fa-497a-42d3-a208-deb12bb359fd"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(32513, 400, 26) (32513, 12)\n"
     ]
    }
   ],
   "source": [
    "X, y = pre_processing(X, y)\n",
    "print(X.shape, y.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "-m007n40mwMt"
   },
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "xlWTdrmCm8iX"
   },
   "outputs": [],
   "source": [
    "LR = 0.001\n",
    "optimizer = RMSprop(lr=LR)\n",
    "CONV = [\n",
    "    {'filters':512, 'kernel':8, 'strides':1, 'padding':'same', 'reg': 0.0, 'pool':2},\n",
    "    {'filters':512, 'kernel':8, 'strides':1, 'padding':'same', 'reg': 0.0, 'pool':2},\n",
    "    {'filters':512, 'kernel':8, 'strides':1, 'padding':'same', 'reg': 0.0, 'pool':''}\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 87,
     "output_extras": [
      {
       "item_id": 1
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1242,
     "status": "ok",
     "timestamp": 1521106332079,
     "user": {
      "displayName": "prabhukiran g",
      "photoUrl": "//lh4.googleusercontent.com/-BYx7PrXEoDk/AAAAAAAAAAI/AAAAAAAAHQ4/Rp2XJAt4a9w/s50-c-k-no/photo.jpg",
      "userId": "111602092102230767926"
     },
     "user_tz": -330
    },
    "id": "DoAt9GyXnE_q",
    "outputId": "02ea85d9-6efd-4195-999d-3fb2bb0397c9"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/deprecation.py:497: calling conv1d (from tensorflow.python.ops.nn_ops) with data_format=NHWC is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "`NHWC` for data_format is deprecated, use `NWC` instead\n"
     ]
    }
   ],
   "source": [
    "inputlayer = Input(shape=(400, 26))\n",
    "\n",
    "network = inputlayer\n",
    "\n",
    "for C in CONV:\n",
    "\n",
    "    # conv layer\n",
    "    network = Conv1D(filters=C['filters'], kernel_size=C['kernel'], \\\n",
    "                     strides=C['strides'], padding=C['padding'], activation='relu', \\\n",
    "                     kernel_regularizer=regularizers.l2(C['reg']))(network)\n",
    "\n",
    "    if type(C['pool']) != int:\n",
    "        continue\n",
    "\n",
    "    # pooling layer\n",
    "    network = MaxPooling1D(C['pool'])(network)\n",
    "\n",
    "# fully connected --------------\n",
    "# ------------------------------\n",
    "network = Flatten()(network)\n",
    "network = Dense(1024, activation='relu')(network)\n",
    "network = Dropout(0)(network)\n",
    "ypred = Dense(12, activation='softmax')(network)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "zG6KOZ1LnLor"
   },
   "outputs": [],
   "source": [
    "model = Model(inputs=inputlayer, outputs=ypred)\n",
    "model.compile(loss='categorical_crossentropy',\n",
    "              optimizer=optimizer,\n",
    "              metrics=['acc'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 471,
     "output_extras": [
      {
       "item_id": 1
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 2369,
     "status": "ok",
     "timestamp": 1521106396296,
     "user": {
      "displayName": "prabhukiran g",
      "photoUrl": "//lh4.googleusercontent.com/-BYx7PrXEoDk/AAAAAAAAAAI/AAAAAAAAHQ4/Rp2XJAt4a9w/s50-c-k-no/photo.jpg",
      "userId": "111602092102230767926"
     },
     "user_tz": -330
    },
    "id": "O1li9WJonN4k",
    "outputId": "9af74da9-a35e-460d-b66a-c843d771ee5f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "input_1 (InputLayer)         (None, 400, 26)           0         \n",
      "_________________________________________________________________\n",
      "conv1d_1 (Conv1D)            (None, 400, 512)          107008    \n",
      "_________________________________________________________________\n",
      "max_pooling1d_1 (MaxPooling1 (None, 200, 512)          0         \n",
      "_________________________________________________________________\n",
      "conv1d_2 (Conv1D)            (None, 200, 512)          2097664   \n",
      "_________________________________________________________________\n",
      "max_pooling1d_2 (MaxPooling1 (None, 100, 512)          0         \n",
      "_________________________________________________________________\n",
      "conv1d_3 (Conv1D)            (None, 100, 512)          2097664   \n",
      "_________________________________________________________________\n",
      "flatten_1 (Flatten)          (None, 51200)             0         \n",
      "_________________________________________________________________\n",
      "dense_1 (Dense)              (None, 1024)              52429824  \n",
      "_________________________________________________________________\n",
      "dropout_1 (Dropout)          (None, 1024)              0         \n",
      "_________________________________________________________________\n",
      "dense_2 (Dense)              (None, 12)                12300     \n",
      "=================================================================\n",
      "Total params: 56,744,460\n",
      "Trainable params: 56,744,460\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "7BOjlIiAnRAg"
   },
   "outputs": [],
   "source": [
    "TB_DIR = r'logs_standard_0207'\n",
    " \n",
    "#os.makedirs(TB_DIR)\n",
    "tensorboard = TensorBoard(log_dir=TB_DIR)\n",
    "\n",
    "estopping = EarlyStopping(monitor='val_acc', patience=10)\n",
    "checkpoint = ModelCheckpoint(filepath='model_0207', save_best_only=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 202,
     "output_extras": [
      {
       "item_id": 11
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 413070,
     "status": "ok",
     "timestamp": 1521106822430,
     "user": {
      "displayName": "prabhukiran g",
      "photoUrl": "//lh4.googleusercontent.com/-BYx7PrXEoDk/AAAAAAAAAAI/AAAAAAAAHQ4/Rp2XJAt4a9w/s50-c-k-no/photo.jpg",
      "userId": "111602092102230767926"
     },
     "user_tz": -330
    },
    "id": "vTc1XO4jnTrO",
    "outputId": "3b1f2d95-0ea1-4a98-c752-ee086c6d0bc1"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 26010 samples, validate on 6503 samples\n",
      "Epoch 1/5\n",
      " - 85s - loss: 13.5014 - acc: 0.1550 - val_loss: 13.5131 - val_acc: 0.1616\n",
      "Epoch 2/5\n",
      " - 80s - loss: 13.6108 - acc: 0.1556 - val_loss: 13.5131 - val_acc: 0.1616\n",
      "Epoch 3/5\n",
      " - 80s - loss: 13.6108 - acc: 0.1556 - val_loss: 13.5131 - val_acc: 0.1616\n",
      "Epoch 4/5\n",
      " - 80s - loss: 13.6108 - acc: 0.1556 - val_loss: 13.5131 - val_acc: 0.1616\n",
      "Epoch 5/5\n",
      " - 80s - loss: 13.6108 - acc: 0.1556 - val_loss: 13.5131 - val_acc: 0.1616\n"
     ]
    }
   ],
   "source": [
    "try:\n",
    "    hist = model.fit(X_train,\n",
    "                     y_train,\n",
    "                     validation_data=(X_test, y_test),\n",
    "                     epochs=5,\n",
    "                     batch_size=256,\n",
    "                     shuffle=False,\n",
    "                     verbose=2,\n",
    "                     callbacks=[checkpoint, estopping, tensorboard])\n",
    " \n",
    "except KeyboardInterrupt:    \n",
    "    print(\"training terminated by user\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 2339,
     "output_extras": [
      {
       "item_id": 7
      },
      {
       "item_id": 16
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 57934,
     "status": "ok",
     "timestamp": 1521107312624,
     "user": {
      "displayName": "prabhukiran g",
      "photoUrl": "//lh4.googleusercontent.com/-BYx7PrXEoDk/AAAAAAAAAAI/AAAAAAAAHQ4/Rp2XJAt4a9w/s50-c-k-no/photo.jpg",
      "userId": "111602092102230767926"
     },
     "user_tz": -330
    },
    "id": "McAJWiuJCJtF",
    "outputId": "591093e8-7f35-441a-d943-41b6fe1782a9"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Preconfiguring packages ...\n",
      "Selecting previously unselected package cron.\n",
      "(Reading database ... 16712 files and directories currently installed.)\n",
      "Preparing to unpack .../00-cron_3.0pl1-128ubuntu5_amd64.deb ...\n",
      "Unpacking cron (3.0pl1-128ubuntu5) ...\n",
      "Selecting previously unselected package libapparmor1:amd64.\n",
      "Preparing to unpack .../01-libapparmor1_2.11.0-2ubuntu17.1_amd64.deb ...\n",
      "Unpacking libapparmor1:amd64 (2.11.0-2ubuntu17.1) ...\n",
      "Selecting previously unselected package libdbus-1-3:amd64.\n",
      "Preparing to unpack .../02-libdbus-1-3_1.10.22-1ubuntu1_amd64.deb ...\n",
      "Unpacking libdbus-1-3:amd64 (1.10.22-1ubuntu1) ...\n",
      "Selecting previously unselected package dbus.\n",
      "Preparing to unpack .../03-dbus_1.10.22-1ubuntu1_amd64.deb ...\n",
      "Unpacking dbus (1.10.22-1ubuntu1) ...\n",
      "Selecting previously unselected package dirmngr.\n",
      "Preparing to unpack .../04-dirmngr_2.1.15-1ubuntu8_amd64.deb ...\n",
      "Unpacking dirmngr (2.1.15-1ubuntu8) ...\n",
      "Selecting previously unselected package distro-info-data.\n",
      "Preparing to unpack .../05-distro-info-data_0.36ubuntu0.1_all.deb ...\n",
      "Unpacking distro-info-data (0.36ubuntu0.1) ...\n",
      "Selecting previously unselected package libkmod2:amd64.\n",
      "Preparing to unpack .../06-libkmod2_24-1ubuntu2_amd64.deb ...\n",
      "Unpacking libkmod2:amd64 (24-1ubuntu2) ...\n",
      "Selecting previously unselected package kmod.\n",
      "Preparing to unpack .../07-kmod_24-1ubuntu2_amd64.deb ...\n",
      "Unpacking kmod (24-1ubuntu2) ...\n",
      "Selecting previously unselected package lsb-release.\n",
      "Preparing to unpack .../08-lsb-release_9.20160110ubuntu5_all.deb ...\n",
      "Unpacking lsb-release (9.20160110ubuntu5) ...\n",
      "Selecting previously unselected package libgirepository-1.0-1:amd64.\n",
      "Preparing to unpack .../09-libgirepository-1.0-1_1.54.1-1_amd64.deb ...\n",
      "Unpacking libgirepository-1.0-1:amd64 (1.54.1-1) ...\n",
      "Selecting previously unselected package gir1.2-glib-2.0:amd64.\n",
      "Preparing to unpack .../10-gir1.2-glib-2.0_1.54.1-1_amd64.deb ...\n",
      "Unpacking gir1.2-glib-2.0:amd64 (1.54.1-1) ...\n",
      "Selecting previously unselected package iso-codes.\n",
      "Preparing to unpack .../11-iso-codes_3.75-1_all.deb ...\n",
      "Unpacking iso-codes (3.75-1) ...\n",
      "Selecting previously unselected package libdbus-glib-1-2:amd64.\n",
      "Preparing to unpack .../12-libdbus-glib-1-2_0.108-2_amd64.deb ...\n",
      "Unpacking libdbus-glib-1-2:amd64 (0.108-2) ...\n",
      "Selecting previously unselected package python-apt-common.\n",
      "Preparing to unpack .../13-python-apt-common_1.4.0~beta3build2_all.deb ...\n",
      "Unpacking python-apt-common (1.4.0~beta3build2) ...\n",
      "Selecting previously unselected package python3-apt.\n",
      "Preparing to unpack .../14-python3-apt_1.4.0~beta3build2_amd64.deb ...\n",
      "Unpacking python3-apt (1.4.0~beta3build2) ...\n",
      "Selecting previously unselected package python3-dbus.\n",
      "Preparing to unpack .../15-python3-dbus_1.2.4-1build3_amd64.deb ...\n",
      "Unpacking python3-dbus (1.2.4-1build3) ...\n",
      "Selecting previously unselected package python3-gi.\n",
      "Preparing to unpack .../16-python3-gi_3.24.1-2build1_amd64.deb ...\n",
      "Unpacking python3-gi (3.24.1-2build1) ...\n",
      "Selecting previously unselected package module-init-tools.\n",
      "Preparing to unpack .../17-module-init-tools_24-1ubuntu2_all.deb ...\n",
      "Unpacking module-init-tools (24-1ubuntu2) ...\n",
      "Selecting previously unselected package python-apt.\n",
      "Preparing to unpack .../18-python-apt_1.4.0~beta3build2_amd64.deb ...\n",
      "Unpacking python-apt (1.4.0~beta3build2) ...\n",
      "Selecting previously unselected package python-pycurl.\n",
      "Preparing to unpack .../19-python-pycurl_7.43.0-2build2_amd64.deb ...\n",
      "Unpacking python-pycurl (7.43.0-2build2) ...\n",
      "Selecting previously unselected package python-software-properties.\n",
      "Preparing to unpack .../20-python-software-properties_0.96.24.17_all.deb ...\n",
      "Unpacking python-software-properties (0.96.24.17) ...\n",
      "Selecting previously unselected package python3-software-properties.\n",
      "Preparing to unpack .../21-python3-software-properties_0.96.24.17_all.deb ...\n",
      "Unpacking python3-software-properties (0.96.24.17) ...\n",
      "Selecting previously unselected package software-properties-common.\n",
      "Preparing to unpack .../22-software-properties-common_0.96.24.17_all.deb ...\n",
      "Unpacking software-properties-common (0.96.24.17) ...\n",
      "Selecting previously unselected package unattended-upgrades.\n",
      "Preparing to unpack .../23-unattended-upgrades_0.98ubuntu1.1_all.deb ...\n",
      "Unpacking unattended-upgrades (0.98ubuntu1.1) ...\n",
      "Setting up python-apt-common (1.4.0~beta3build2) ...\n",
      "Setting up python3-apt (1.4.0~beta3build2) ...\n",
      "Setting up iso-codes (3.75-1) ...\n",
      "Setting up distro-info-data (0.36ubuntu0.1) ...\n",
      "Setting up python-pycurl (7.43.0-2build2) ...\n",
      "Setting up lsb-release (9.20160110ubuntu5) ...\n",
      "Setting up libgirepository-1.0-1:amd64 (1.54.1-1) ...\n",
      "Setting up libkmod2:amd64 (24-1ubuntu2) ...\n",
      "Setting up gir1.2-glib-2.0:amd64 (1.54.1-1) ...\n",
      "Processing triggers for libc-bin (2.26-0ubuntu2.1) ...\n",
      "Setting up libapparmor1:amd64 (2.11.0-2ubuntu17.1) ...\n",
      "Setting up unattended-upgrades (0.98ubuntu1.1) ...\n",
      "\n",
      "Creating config file /etc/apt/apt.conf.d/20auto-upgrades with new version\n",
      "\n",
      "Creating config file /etc/apt/apt.conf.d/50unattended-upgrades with new version\n",
      "invoke-rc.d: could not determine current runlevel\n",
      "invoke-rc.d: policy-rc.d denied execution of start.\n",
      "Setting up dirmngr (2.1.15-1ubuntu8) ...\n",
      "Setting up cron (3.0pl1-128ubuntu5) ...\n",
      "Adding group `crontab' (GID 102) ...\n",
      "Done.\n",
      "update-rc.d: warning: start and stop actions are no longer supported; falling back to defaults\n",
      "update-rc.d: warning: stop runlevel arguments (1) do not match cron Default-Stop values (none)\n",
      "invoke-rc.d: could not determine current runlevel\n",
      "invoke-rc.d: policy-rc.d denied execution of start.\n",
      "Setting up libdbus-1-3:amd64 (1.10.22-1ubuntu1) ...\n",
      "Setting up kmod (24-1ubuntu2) ...\n",
      "Setting up libdbus-glib-1-2:amd64 (0.108-2) ...\n",
      "Setting up python3-gi (3.24.1-2build1) ...\n",
      "Setting up module-init-tools (24-1ubuntu2) ...\n",
      "Setting up python3-software-properties (0.96.24.17) ...\n",
      "Setting up dbus (1.10.22-1ubuntu1) ...\n",
      "Setting up python-apt (1.4.0~beta3build2) ...\n",
      "Setting up python3-dbus (1.2.4-1build3) ...\n",
      "Setting up python-software-properties (0.96.24.17) ...\n",
      "Setting up software-properties-common (0.96.24.17) ...\n",
      "Processing triggers for libc-bin (2.26-0ubuntu2.1) ...\n",
      "Processing triggers for dbus (1.10.22-1ubuntu1) ...\n",
      "gpg: keybox '/tmp/tmpva6atlhl/pubring.gpg' created\n",
      "gpg: /tmp/tmpva6atlhl/trustdb.gpg: trustdb created\n",
      "gpg: key AD5F235DF639B041: public key \"Launchpad PPA for Alessandro Strada\" imported\n",
      "gpg: Total number processed: 1\n",
      "gpg:               imported: 1\n",
      "Warning: apt-key output should not be parsed (stdout is not a terminal)\n",
      "Selecting previously unselected package libfuse2:amd64.\n",
      "(Reading database ... 18120 files and directories currently installed.)\n",
      "Preparing to unpack .../libfuse2_2.9.7-1ubuntu1_amd64.deb ...\n",
      "Unpacking libfuse2:amd64 (2.9.7-1ubuntu1) ...\n",
      "Selecting previously unselected package fuse.\n",
      "Preparing to unpack .../fuse_2.9.7-1ubuntu1_amd64.deb ...\n",
      "Unpacking fuse (2.9.7-1ubuntu1) ...\n",
      "Selecting previously unselected package google-drive-ocamlfuse.\n",
      "Preparing to unpack .../google-drive-ocamlfuse_0.6.21-0ubuntu2_amd64.deb ...\n",
      "Unpacking google-drive-ocamlfuse (0.6.21-0ubuntu2) ...\n",
      "Setting up libfuse2:amd64 (2.9.7-1ubuntu1) ...\n",
      "Processing triggers for libc-bin (2.26-0ubuntu2.1) ...\n",
      "Setting up fuse (2.9.7-1ubuntu1) ...\n",
      "Setting up google-drive-ocamlfuse (0.6.21-0ubuntu2) ...\n",
      "Please, open the following URL in a web browser: https://accounts.google.com/o/oauth2/auth?client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&response_type=code&access_type=offline&approval_prompt=force\n",
      "··········\n",
      "Please, open the following URL in a web browser: https://accounts.google.com/o/oauth2/auth?client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&response_type=code&access_type=offline&approval_prompt=force\n",
      "Please enter the verification code: Access token retrieved correctly.\n"
     ]
    }
   ],
   "source": [
    "#we will use the google drive work as a local drive and save the model\n",
    "# Install a Drive FUSE wrapper.\n",
    "# https://github.com/astrada/google-drive-ocamlfuse\n",
    "!apt-get install -y -qq software-properties-common python-software-properties module-init-tools\n",
    "!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null\n",
    "!apt-get update -qq 2>&1 > /dev/null\n",
    "!apt-get -y install -qq google-drive-ocamlfuse fuse\n",
    "\n",
    "# Generate auth tokens for Colab\n",
    "from google.colab import auth\n",
    "auth.authenticate_user()\n",
    "\n",
    "\n",
    "# Generate creds for the Drive FUSE library.\n",
    "from oauth2client.client import GoogleCredentials\n",
    "creds = GoogleCredentials.get_application_default()\n",
    "import getpass\n",
    "!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL\n",
    "vcode = getpass.getpass()\n",
    "!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 1479,
     "output_extras": [
      {
       "item_id": 2
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 3870,
     "status": "ok",
     "timestamp": 1521109512640,
     "user": {
      "displayName": "prabhukiran g",
      "photoUrl": "//lh4.googleusercontent.com/-BYx7PrXEoDk/AAAAAAAAAAI/AAAAAAAAHQ4/Rp2XJAt4a9w/s50-c-k-no/photo.jpg",
      "userId": "111602092102230767926"
     },
     "user_tz": -330
    },
    "id": "ykZIgTWTCZED",
    "outputId": "c64b7152-a0eb-4de3-8e2b-133f41553033"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fuse: mountpoint is not empty\r\n",
      "fuse: if you are sure this is safe, use the 'nonempty' mount option\n",
      "1.JPG\n",
      "2.jpg\n",
      "3.JPG\n",
      "4.JPG\n",
      "5.jpg\n",
      "ADD.pdf\n",
      "ad_target.R\n",
      "Agile Data Science.pdf\n",
      "Airbnb.rar\n",
      "Application Form1.pdf\n",
      "ARM_loc_v6.R\n",
      "bookofruby.pdf\n",
      "certificates\n",
      "Ch05.pdf\n",
      "Colab Notebooks\n",
      "control-m.pdf\n",
      "Copy of Toxic_comment_practice (0e3265c5).ipynb\n",
      "Copy of Toxic_comment_practice (55627396).ipynb\n",
      "Copy of Toxic_comment_practice (5796848e).ipynb\n",
      "Copy of Toxic_comment_practice.ipynb\n",
      "Copy of Untitled0.ipynb\n",
      "Copy of Word2vec_practice.ipynb\n",
      "Data Analysis new data.xlsx\n",
      "Data Mining - Practical Machine Learning Tools and Techniques (3rd Ed).pdf\n",
      "Data_prep_for_classification.R\n",
      "Data Science for Business.pdf\n",
      "Doc2vec_practice.ipynb\n",
      "Ernest\n",
      "ESLII_print10.pdf\n",
      "Freelancing and code\n",
      "IMG_20160413_211734711_TOP.jpg\n",
      "IMG_20160413_211751358.jpg\n",
      "IMG_20160413_211814479.jpg\n",
      "IMG_20160416_200834362.jpg\n",
      "IMG_20160422_192704832.jpg\n",
      "IMG_20160505_010152450.jpg\n",
      "IMG_20160505_010254430_TOP.jpg\n",
      "IMG_20160505_012743.jpg\n",
      "IMG_20160505_012858.jpg\n",
      "IMG_20160505_014627.jpg\n",
      "IMG_20160505_114840.jpg\n",
      "IMG_20160505_240421666.jpg\n",
      "IMG_20160505_240821561.jpg\n",
      "IMG_20160505_240900701.jpg\n",
      "IMG_20160505_240915959.jpg\n",
      "IMG_20160505_241100840.jpg\n",
      "IMG_20160505_243543933.jpg\n",
      "IMG_20160506_104058.jpg\n",
      "Intoduction to Statistical Learning R Sixth Printing.pdf\n",
      "Invoice OD40402089038.pdf\n",
      "kiranresume.docx.odt\n",
      "kiranresume.docx.odt (55d1f6db)\n",
      "Lab.docx.odt\n",
      "LCTASQL.pdf\n",
      "lesson5-movielens.ipynb\n",
      "Machine Learning in Action.pdf\n",
      "memories.mp4\n",
      "-MKT_002_01032017.csv\n",
      "ModelSummary\n",
      "OpenIntroStatsThirdEdition.pdf\n",
      "OReilly.Head.First.GetIndianStuff.com.HTML.With.CSS.XHTML.pdf\n",
      "OReilly.Python.For.Data.Analysis.Oct.2012.ISBN.1449319793.pdf\n",
      "Passport_pic.pdf\n",
      "Prabhukiran.jpg\n",
      "prabhu.zip\n",
      "Projects_and_research\n",
      "Prospectus_2010.pdf\n",
      "Ratings.ods\n",
      "resume\n",
      "RESUME.doc.odt\n",
      "RESUME.pdf\n",
      "Saloni Certificates\n",
      "SAP Author classification using CNN on Text.ipynb\n",
      "Screenshot from 2013-03-03 23:17:33.png\n",
      "signature.jpg\n",
      "slides6.pdf\n",
      "sms-20161006094626.xml\n",
      "sms.xsl\n",
      "Spark Training _ Harman\n",
      "sql_korth.pdf\n",
      "student details to forward.ods\n",
      "student details to forward.pdf\n",
      "Toxic_comment_practice_v2.ipynb\n",
      "Training_DS_R_Subbu\n",
      "xgboost jars\n"
     ]
    }
   ],
   "source": [
    "# Create a directory 'MyModelSummary' and mount Google Drive using that directory.\n",
    "!mkdir -p MyModelSummary\n",
    "!google-drive-ocamlfuse MyModelSummary\n",
    "!ls MyModelSummary/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "peWqxm6sEnUR"
   },
   "outputs": [],
   "source": [
    "#create a directory to save model summary in the mounted directory,this is where we save all the model specifications\n",
    "#1.the architecture of the model, allowing to re-create the model\n",
    "#2.the weights of the model\n",
    "#3.the training configuration (loss, optimizer)\n",
    "#4.the state of the optimizer, allowing to resume training exactly where you left off.\n",
    "!mkdir /content/MyModelSummary/ModelSummary/\n",
    "!chmod 777 /content/MyModelSummary/ModelSummary/\n",
    "\n",
    "model.save('/content/MyModelSummary/ModelSummary/model.h5')  # creates a HDF5 file 'my_model.h5'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 471,
     "output_extras": [
      {
       "item_id": 1
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 6929,
     "status": "ok",
     "timestamp": 1521109270294,
     "user": {
      "displayName": "prabhukiran g",
      "photoUrl": "//lh4.googleusercontent.com/-BYx7PrXEoDk/AAAAAAAAAAI/AAAAAAAAHQ4/Rp2XJAt4a9w/s50-c-k-no/photo.jpg",
      "userId": "111602092102230767926"
     },
     "user_tz": -330
    },
    "id": "GVgLGIRH6fV0",
    "outputId": "cb885229-47ca-4fe8-cbf8-f39a0e04ed99"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "input_1 (InputLayer)         (None, 400, 26)           0         \n",
      "_________________________________________________________________\n",
      "conv1d_1 (Conv1D)            (None, 400, 512)          107008    \n",
      "_________________________________________________________________\n",
      "max_pooling1d_1 (MaxPooling1 (None, 200, 512)          0         \n",
      "_________________________________________________________________\n",
      "conv1d_2 (Conv1D)            (None, 200, 512)          2097664   \n",
      "_________________________________________________________________\n",
      "max_pooling1d_2 (MaxPooling1 (None, 100, 512)          0         \n",
      "_________________________________________________________________\n",
      "conv1d_3 (Conv1D)            (None, 100, 512)          2097664   \n",
      "_________________________________________________________________\n",
      "flatten_1 (Flatten)          (None, 51200)             0         \n",
      "_________________________________________________________________\n",
      "dense_1 (Dense)              (None, 1024)              52429824  \n",
      "_________________________________________________________________\n",
      "dropout_1 (Dropout)          (None, 1024)              0         \n",
      "_________________________________________________________________\n",
      "dense_2 (Dense)              (None, 12)                12300     \n",
      "=================================================================\n",
      "Total params: 56,744,460\n",
      "Trainable params: 56,744,460\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "#From the next time,just load the model with the below lines and continue with your model execution\n",
    "from keras.models import load_model\n",
    "\n",
    "# returns a compiled model,identical to the previous run\n",
    "model_2 = load_model('/content/MyModelSummary/ModelSummary/model.h5')\n",
    "model_2.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     },
     "base_uri": "https://localhost:8080/",
     "height": 101,
     "output_extras": [
      {
       "item_id": 3
      }
     ]
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 162236,
     "status": "ok",
     "timestamp": 1521108974183,
     "user": {
      "displayName": "prabhukiran g",
      "photoUrl": "//lh4.googleusercontent.com/-BYx7PrXEoDk/AAAAAAAAAAI/AAAAAAAAHQ4/Rp2XJAt4a9w/s50-c-k-no/photo.jpg",
      "userId": "111602092102230767926"
     },
     "user_tz": -330
    },
    "id": "24QrlH9BHMt9",
    "outputId": "d812b5a6-f45a-49a8-ea60-76ecc9ca75fa"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 26010 samples, validate on 6503 samples\n",
      "Epoch 1/2\n",
      " - 81s - loss: 13.6108 - acc: 0.1556 - val_loss: 13.5131 - val_acc: 0.1616\n",
      "Epoch 2/2\n",
      " - 80s - loss: 13.6108 - acc: 0.1556 - val_loss: 13.5131 - val_acc: 0.1616\n"
     ]
    }
   ],
   "source": [
    "#We are running the model with previous check point \n",
    "try:\n",
    "    hist = model_2.fit(X_train,\n",
    "                     y_train,\n",
    "                     validation_data=(X_test, y_test),\n",
    "                     epochs=2,\n",
    "                     batch_size=256,\n",
    "                     shuffle=False,\n",
    "                     verbose=2,\n",
    "                     callbacks=[checkpoint, estopping, tensorboard])\n",
    " \n",
    "except KeyboardInterrupt:    \n",
    "    print(\"training terminated by user\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "id": "0DbkANHRH7Au"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "default_view": {},
   "name": "SAP Author classification using CNN on Text.ipynb",
   "provenance": [],
   "version": "0.3.2",
   "views": {}
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}