Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@alfredfrancis
Last active May 14, 2018 17:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alfredfrancis/fe61fdae2c71773e86c0d26d6e336dc0 to your computer and use it in GitHub Desktop.
Save alfredfrancis/fe61fdae2c71773e86c0d26d6e336dc0 to your computer and use it in GitHub Desktop.
intent_classification.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "intent_classification.ipynb",
"version": "0.3.2",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"[View in Colaboratory](https://colab.research.google.com/gist/alfredfrancis/fe61fdae2c71773e86c0d26d6e336dc0/intent_classification.ipynb)"
]
},
{
"metadata": {
"id": "9e23lvSDeEik",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
},
"outputId": "ce309c38-a75e-42e3-9ccb-ab8b8a66e4df"
},
"cell_type": "code",
"source": [
"!pip install -q -U tensorflow==1.8.0\n",
"!pip install -q -U sklearn\n",
"!pip install -q -U numpy\n",
"!pip install -q -U spacy\n",
"!python -m spacy download en\n",
"\n"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.0.0/en_core_web_sm-2.0.0.tar.gz\n",
"\u001b[?25l Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.0.0/en_core_web_sm-2.0.0.tar.gz (37.4MB)\n",
"\u001b[K 100% |████████████████████████████████| 37.4MB 7.0MB/s \n",
"\u001b[?25hRequirement already satisfied (use --upgrade to upgrade): en-core-web-sm==2.0.0 from https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.0.0/en_core_web_sm-2.0.0.tar.gz in /usr/local/lib/python3.6/dist-packages\n",
"\n",
"\u001b[93m Linking successful\u001b[0m\n",
" /usr/local/lib/python3.6/dist-packages/en_core_web_sm -->\n",
" /usr/local/lib/python3.6/dist-packages/spacy/data/en\n",
"\n",
" You can now load the model via spacy.load('en')\n",
"\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "3D-pNUb7e3bI",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"import numpy as np\n",
"import tensorflow as tf\n",
"import spacy\n",
"nlp = spacy.load('en')"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "WZU7wRH3e6HJ",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"\n",
"#training data\n",
"\n",
"X =[u'i want to cancel', u'cancel that', u'cancel', u'im looking for a place in banglore serving Chinese', u\"i'm looking for Chinese food\", u\"I'm looking for south indian places\", u'im looking for a place near banglore', u\"i'm looking for a place to eat near down town la\", u\"i'm looking for a place in new york\", u'im looking for a place in banglore', u'looking for indian cuisine in new york', u'central indian restaurant', u'I am looking for mexican indian fusion', u'I am looking a restaurant in 29432', u'I am looking for asian fusion food', u'anywhere near 18328', u'anywhere in the west', u'search for restaurants', u'i am looking for an indian spot called olaolaolaolaolaola', u'show me a mexican place in the centre', u'show me chines restaurants in the north', u'show me chinese restaurants', u\"i'm looking for a place in the north of town\", u'I am searching for a dinner spot', u'I want to grab lunch', u\"i'm looking for a place to eat\", u'dear sir', u'good evening', u'good morning', u'hi', u'hello', u'hey there', u'howdy', u'hey', u'sounds really good', u'great choice', u'correct', u'right, thank you', u'great', u'ok', u\"that's right\", u'indeed', u'yeah', u'yep', u'yes', u'have a good one', u'Bye bye', u'farewell', u'end', u'stop', u'good bye', u'goodbye', u'bye', u'thank you iky', u'thanks', u'thank you very much']\n",
"y = ['cancel', 'cancel', 'cancel', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'restaurant_search', 'greet', 'greet', 'greet', 'greet', 'greet', 'greet', 'greet', 'greet', 'affirm', 'affirm', 'affirm', 'affirm', 'affirm', 'affirm', 'affirm', 'affirm', 'affirm', 'affirm', 'affirm', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'thank_you', 'thank_you', 'thank_you']\n"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "7A9cUsD62Ivg",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 238
},
"outputId": "259d0e3e-deb4-4679-f304-3854e7c4c33d"
},
"cell_type": "code",
"source": [
"# one hot embedding - not using\n",
"\n",
"vocab_size = 300\n",
"tokenize = tf.keras.preprocessing.text.Tokenizer(num_words=vocab_size)\n",
"tokenize.fit_on_texts(X)\n",
"x_train = tokenize.texts_to_matrix(X)\n",
"print(x_train.shape)\n",
"print(x_train)\n"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.0.0/en_core_web_sm-2.0.0.tar.gz\n",
"\u001b[?25l Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.0.0/en_core_web_sm-2.0.0.tar.gz (37.4MB)\n",
"\u001b[K 100% |████████████████████████████████| 37.4MB 60.1MB/s \n",
"\u001b[?25hInstalling collected packages: en-core-web-sm\n",
" Running setup.py install for en-core-web-sm ... \u001b[?25l-\b \b\\\b \b|\b \bdone\n",
"\u001b[?25hSuccessfully installed en-core-web-sm-2.0.0\n",
"\n",
"\u001b[93m Linking successful\u001b[0m\n",
" /usr/local/lib/python3.6/dist-packages/en_core_web_sm -->\n",
" /usr/local/lib/python3.6/dist-packages/spacy/data/en\n",
"\n",
" You can now load the model via spacy.load('en')\n",
"\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "Q5xMG4hffrY3",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 255
},
"outputId": "145c94b1-470e-4955-fccd-2cf8bf57e1c4"
},
"cell_type": "code",
"source": [
"# spacy context vector size\n",
"vocab_size = 384\n",
"\n",
"# create spacy doc vector matrix\n",
"x_train = np.array([list(nlp(x).vector) for x in X])\n",
"print(x_train)\n",
"print(x_train.shape)\n"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"[[ 0.7439925 0.47500432 1.7174635 ... -0.10623532 0.20589437\n",
" -0.0548238 ]\n",
" [ 2.7006192 0.6802906 1.6995202 ... -0.07486539 0.09393249\n",
" -0.14652586]\n",
" [-1.1281996 1.4189541 3.0053968 ... -0.5013096 0.25840303\n",
" -0.4108916 ]\n",
" ...\n",
" [-0.09876251 -1.1779536 2.6216795 ... 0.6765316 0.06103614\n",
" -0.01082057]\n",
" [ 1.155243 3.1034093 3.712947 ... 0.3397486 0.12243246\n",
" 0.24622968]\n",
" [ 0.18381912 -1.6224215 -0.3685107 ... 0.3254633 -0.01642913\n",
" 0.05653178]]\n",
"(56, 384)\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "YoFvAXxdg4_X",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "fde3f3f9-f0c3-4289-8079-4f0bd4302fd9"
},
"cell_type": "code",
"source": [
"#binarize class labels\n",
"\n",
"num_labels = len(set(y))\n",
"print(num_labels)\n",
"from sklearn.preprocessing import LabelBinarizer\n",
"encoder = LabelBinarizer()\n",
"encoder.fit(y)\n",
"y_train = encoder.transform(y)"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"6\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "rUEGgT4TiASh",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 238
},
"outputId": "a862374a-7712-48e7-ba60-ab25878a4cda"
},
"cell_type": "code",
"source": [
"# define a seqential model with keras api\n",
"model = tf.keras.Sequential()\n",
"model.add(tf.keras.layers.Dense(512, activation=tf.nn.relu, input_shape=(vocab_size,)))\n",
"model.add(tf.keras.layers.Dense(256, activation=tf.nn.relu))\n",
"model.add(tf.keras.layers.Dense(num_labels, activation=tf.nn.softmax))\n",
"\n",
"model.compile(loss='categorical_crossentropy',\n",
" optimizer='adam',\n",
" metrics=['accuracy'])\n",
"\n",
"model.summary()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"dense_35 (Dense) (None, 512) 197120 \n",
"_________________________________________________________________\n",
"dense_36 (Dense) (None, 6) 3078 \n",
"_________________________________________________________________\n",
"dense_37 (Dense) (None, 6) 42 \n",
"=================================================================\n",
"Total params: 200,240\n",
"Trainable params: 200,240\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "JGeQ61o6pRyF",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"input_shape = (vocab_size,)\n",
"model = tf.keras.Sequential()\n",
"model.add(tf.keras.layers.Reshape(input_shape+(1,),input_shape=input_shape))\n",
"model.add(tf.keras.layers.Dense(30, activation=tf.nn.relu, input_shape=input_shape))\n",
"model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20, activation=tf.nn.relu,return_sequences=False)))\n",
"model.add(tf.keras.layers.Dense(num_labels, activation=tf.nn.sigmoid))\n",
"model.compile(loss='categorical_crossentropy',\n",
" optimizer='adam',\n",
" metrics=['accuracy'])\n"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "Uxo8hFh8pU2i",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"model = tf.keras.Sequential()\n",
"model.add(tf.keras.layers.Dense(512, activation=tf.nn.relu, input_shape=(vocab_size,)))\n",
"model.add(tf.keras.LSTM(100, dropout=0.2, recurrent_dropout=0.2))\n",
"model.add(tf.keras.Dense(1, activation='sigmoid'))\n",
"model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
"model.compile(loss='categorical_crossentropy',\n",
" optimizer='adam',\n",
" metrics=['accuracy'])"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "NVecGz-5pl-v",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"model_conv = tf.keras.Sequential()\n",
"model_conv.add(tf.keras.layers.Embedding(vocabulary_size, 100, input_length=50))\n",
"model_conv.add(tf.keras.layers.Dropout(0.2))\n",
"model_conv.add(tf.keras.layers.Conv1D(64, 5, activation='relu'))\n",
"model_conv.add(tf.keras.layers.MaxPooling1D(pool_size=4))\n",
"model_conv.add(tf.keras.layers.LSTM(100))\n",
"model_conv.add(tf.keras.layers.Dense(1, activation='sigmoid'))\n",
"model_conv.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
"model.compile(loss='categorical_crossentropy',\n",
" optimizer='adam',\n",
" metrics=['accuracy'])"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "Zw97Xw-Qifnn",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1734
},
"outputId": "2dc399e3-ac01-44d2-c2be-cb85e64dfa94"
},
"cell_type": "code",
"source": [
"# start training\n",
"model.fit(x_train, y_train,shuffle=True, epochs=50, verbose=1)"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Epoch 1/50\n",
"56/56 [==============================] - 0s 444us/step - loss: 0.0328 - acc: 1.0000\n",
"Epoch 2/50\n",
"56/56 [==============================] - 0s 476us/step - loss: 0.0324 - acc: 1.0000\n",
"Epoch 3/50\n",
"56/56 [==============================] - 0s 430us/step - loss: 0.0321 - acc: 1.0000\n",
"Epoch 4/50\n",
"56/56 [==============================] - 0s 361us/step - loss: 0.0319 - acc: 1.0000\n",
"Epoch 5/50\n",
"56/56 [==============================] - 0s 307us/step - loss: 0.0317 - acc: 1.0000\n",
"Epoch 6/50\n",
"56/56 [==============================] - 0s 317us/step - loss: 0.0315 - acc: 1.0000\n",
"Epoch 7/50\n",
"56/56 [==============================] - 0s 341us/step - loss: 0.0314 - acc: 1.0000\n",
"Epoch 8/50\n",
"56/56 [==============================] - 0s 306us/step - loss: 0.0312 - acc: 1.0000\n",
"Epoch 9/50\n",
"56/56 [==============================] - 0s 444us/step - loss: 0.0311 - acc: 1.0000\n",
"Epoch 10/50\n",
"56/56 [==============================] - 0s 353us/step - loss: 0.0309 - acc: 1.0000\n",
"Epoch 11/50\n",
"56/56 [==============================] - 0s 324us/step - loss: 0.0308 - acc: 1.0000\n",
"Epoch 12/50\n",
"56/56 [==============================] - 0s 319us/step - loss: 0.0307 - acc: 1.0000\n",
"Epoch 13/50\n",
"56/56 [==============================] - 0s 314us/step - loss: 0.0306 - acc: 1.0000\n",
"Epoch 14/50\n",
"56/56 [==============================] - 0s 394us/step - loss: 0.0715 - acc: 0.9821\n",
"Epoch 15/50\n",
"56/56 [==============================] - 0s 390us/step - loss: 0.0308 - acc: 1.0000\n",
"Epoch 16/50\n",
"56/56 [==============================] - 0s 473us/step - loss: 0.0306 - acc: 1.0000\n",
"Epoch 17/50\n",
"56/56 [==============================] - 0s 362us/step - loss: 0.0304 - acc: 1.0000\n",
"Epoch 18/50\n",
"56/56 [==============================] - 0s 362us/step - loss: 0.0303 - acc: 1.0000\n",
"Epoch 19/50\n",
"56/56 [==============================] - 0s 431us/step - loss: 0.0303 - acc: 1.0000\n",
"Epoch 20/50\n",
"56/56 [==============================] - 0s 322us/step - loss: 0.0302 - acc: 1.0000\n",
"Epoch 21/50\n",
"56/56 [==============================] - 0s 313us/step - loss: 0.0301 - acc: 1.0000\n",
"Epoch 22/50\n",
"56/56 [==============================] - 0s 275us/step - loss: 0.0300 - acc: 1.0000\n",
"Epoch 23/50\n",
"56/56 [==============================] - 0s 343us/step - loss: 0.0300 - acc: 1.0000\n",
"Epoch 24/50\n",
"56/56 [==============================] - 0s 349us/step - loss: 0.0299 - acc: 1.0000\n",
"Epoch 25/50\n",
"56/56 [==============================] - 0s 344us/step - loss: 0.0299 - acc: 1.0000\n",
"Epoch 26/50\n",
"56/56 [==============================] - 0s 284us/step - loss: 0.0298 - acc: 1.0000\n",
"Epoch 27/50\n",
"56/56 [==============================] - 0s 285us/step - loss: 0.0297 - acc: 1.0000\n",
"Epoch 28/50\n",
"56/56 [==============================] - 0s 330us/step - loss: 0.0297 - acc: 1.0000\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"Epoch 29/50\n",
"56/56 [==============================] - 0s 322us/step - loss: 0.0296 - acc: 1.0000\n",
"Epoch 30/50\n",
"56/56 [==============================] - 0s 306us/step - loss: 0.0296 - acc: 1.0000\n",
"Epoch 31/50\n",
"56/56 [==============================] - 0s 347us/step - loss: 0.0295 - acc: 1.0000\n",
"Epoch 32/50\n",
"56/56 [==============================] - 0s 339us/step - loss: 0.0295 - acc: 1.0000\n",
"Epoch 33/50\n",
"56/56 [==============================] - 0s 319us/step - loss: 0.0294 - acc: 1.0000\n",
"Epoch 34/50\n",
"56/56 [==============================] - 0s 332us/step - loss: 0.0294 - acc: 1.0000\n",
"Epoch 35/50\n",
"56/56 [==============================] - 0s 327us/step - loss: 0.0293 - acc: 1.0000\n",
"Epoch 36/50\n",
"56/56 [==============================] - 0s 378us/step - loss: 0.0293 - acc: 1.0000\n",
"Epoch 37/50\n",
"56/56 [==============================] - 0s 340us/step - loss: 0.0292 - acc: 1.0000\n",
"Epoch 38/50\n",
"56/56 [==============================] - 0s 282us/step - loss: 0.0292 - acc: 1.0000\n",
"Epoch 39/50\n",
"56/56 [==============================] - 0s 330us/step - loss: 0.0292 - acc: 1.0000\n",
"Epoch 40/50\n",
"56/56 [==============================] - 0s 365us/step - loss: 0.0291 - acc: 1.0000\n",
"Epoch 41/50\n",
"56/56 [==============================] - 0s 329us/step - loss: 0.0291 - acc: 1.0000\n",
"Epoch 42/50\n",
"56/56 [==============================] - 0s 373us/step - loss: 0.0290 - acc: 1.0000\n",
"Epoch 43/50\n",
"56/56 [==============================] - 0s 389us/step - loss: 0.0290 - acc: 1.0000\n",
"Epoch 44/50\n",
"56/56 [==============================] - 0s 401us/step - loss: 0.0290 - acc: 1.0000\n",
"Epoch 45/50\n",
"56/56 [==============================] - 0s 454us/step - loss: 0.0289 - acc: 1.0000\n",
"Epoch 46/50\n",
"56/56 [==============================] - 0s 407us/step - loss: 0.0289 - acc: 1.0000\n",
"Epoch 47/50\n",
"56/56 [==============================] - 0s 369us/step - loss: 0.0288 - acc: 1.0000\n",
"Epoch 48/50\n",
"56/56 [==============================] - 0s 419us/step - loss: 0.0288 - acc: 1.0000\n",
"Epoch 49/50\n",
"56/56 [==============================] - 0s 351us/step - loss: 0.0288 - acc: 1.0000\n",
"Epoch 50/50\n",
"56/56 [==============================] - 0s 347us/step - loss: 0.0287 - acc: 1.0000\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<tensorflow.python.keras._impl.keras.callbacks.History at 0x7f76c22b86d8>"
]
},
"metadata": {
"tags": []
},
"execution_count": 194
}
]
},
{
"metadata": {
"id": "5JofAZy9NtZ0",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"outputId": "cfca80ef-2ece-46ea-c72d-36eea8698835"
},
"cell_type": "code",
"source": [
"loss, accuracy = model.evaluate(x_train, y_train)\n",
"print('Test accuracy: %.2f' % (accuracy))"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"56/56 [==============================] - 0s 161us/step\n",
"Test accuracy: 1.00\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "-S40ynFPkcQR",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
},
"outputId": "4f1c38d7-2296-44ff-ace3-deb2f5c383de"
},
"cell_type": "code",
"source": [
"#run sample prediction\n",
"\n",
"query = \"i want to cancel it\"\n",
"\n",
"# for one hot \n",
"# x_predict = tokenize.texts_to_matrix([query])\n",
"\n",
"x_predict = [nlp(query).vector]\n",
"\n",
"prediction = model.predict(np.array([x_predict[0]]))\n",
"print(prediction)\n",
"text_labels = encoder.classes_ \n",
"predicted_label = text_labels[np.argmax(prediction[0])]\n",
"print(\"Predicted label: \" + predicted_label )\n",
"\n",
"print(\"Confidence: %.2f\"% prediction[0].max())\n"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"[[2.5602958e-06 9.9089348e-01 5.0208801e-05 2.7880812e-06 9.0509029e-03\n",
" 1.5517148e-08]]\n",
"Predicted label: cancel\n",
"Confidence: 0.99\n"
],
"name": "stdout"
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment