Skip to content

Instantly share code, notes, and snippets.

@Lord-V15
Created March 26, 2021 11:47
Show Gist options
  • Save Lord-V15/f5225788a77b0b50a42461d4ce6b4e74 to your computer and use it in GitHub Desktop.
Save Lord-V15/f5225788a77b0b50a42461d4ce6b4e74 to your computer and use it in GitHub Desktop.
quora_question_similarity.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "quora_question_similarity.ipynb",
"provenance": [],
"collapsed_sections": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "TPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/Lord-V15/f5225788a77b0b50a42461d4ce6b4e74/quora_question_similarity.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4-OUQtxZtTMv"
},
"source": [
"### Imporitng the required libraries"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ee8xJZ6GD6E7"
},
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"import tensorflow_hub as hub\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import os\n",
"import pandas as pd\n",
"import re\n",
"import seaborn as sns\n",
"import keras.layers as layers\n",
"from keras.models import Model\n",
"from keras import backend as K\n",
"np.random.seed(10)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "Eir2E8FWta-4"
},
"source": [
"### Mounting the drive for the source file and reading the data in dataframe"
]
},
{
"cell_type": "code",
"metadata": {
"id": "80EbqphG_LMc",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 122
},
"outputId": "cf177ee6-deae-43d9-dcb7-969240db975c"
},
"source": [
"# mountinh the google drive to read the source \n",
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code\n",
"\n",
"Enter your authorization code:\n",
"··········\n",
"Mounted at /content/drive\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "KIRN2RlR_pg2",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "70f493f3-af46-4ab2-a7f2-03394bcd6977"
},
"source": [
"# using pandas to read the csv file\n",
"df = pd.read_csv(r'../questions.csv')\n",
"df.shape"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(404351, 6)"
]
},
"metadata": {
"tags": []
},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "NJjISuR5Husu",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"outputId": "672dbd9f-c0aa-42d4-8cd9-04c6181eafe1"
},
"source": [
"df.describe()\n"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>qid1</th>\n",
" <th>qid2</th>\n",
" <th>is_duplicate</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>404351.000000</td>\n",
" <td>404351.000000</td>\n",
" <td>404351.000000</td>\n",
" <td>404351.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>202175.000000</td>\n",
" <td>391840.987691</td>\n",
" <td>390195.973765</td>\n",
" <td>0.369248</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>116726.223686</td>\n",
" <td>228430.857607</td>\n",
" <td>228803.645742</td>\n",
" <td>0.482602</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>2.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>101087.500000</td>\n",
" <td>193381.000000</td>\n",
" <td>191012.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>202175.000000</td>\n",
" <td>390630.000000</td>\n",
" <td>388364.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>303262.500000</td>\n",
" <td>589514.000000</td>\n",
" <td>588071.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>404350.000000</td>\n",
" <td>789800.000000</td>\n",
" <td>789801.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id qid1 qid2 is_duplicate\n",
"count 404351.000000 404351.000000 404351.000000 404351.000000\n",
"mean 202175.000000 391840.987691 390195.973765 0.369248\n",
"std 116726.223686 228430.857607 228803.645742 0.482602\n",
"min 0.000000 1.000000 2.000000 0.000000\n",
"25% 101087.500000 193381.000000 191012.000000 0.000000\n",
"50% 202175.000000 390630.000000 388364.000000 0.000000\n",
"75% 303262.500000 589514.000000 588071.000000 1.000000\n",
"max 404350.000000 789800.000000 789801.000000 1.000000"
]
},
"metadata": {
"tags": []
},
"execution_count": 5
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "3lD1lBpRJPxi",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 669
},
"outputId": "f4b575c6-f6c9-4a2a-a483-7cd0d5adcff0"
},
"source": [
"df.head(20)"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>qid1</th>\n",
" <th>qid2</th>\n",
" <th>question1</th>\n",
" <th>question2</th>\n",
" <th>is_duplicate</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>What is the step by step guide to invest in sh...</td>\n",
" <td>What is the step by step guide to invest in sh...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>What is the story of Kohinoor (Koh-i-Noor) Dia...</td>\n",
" <td>What would happen if the Indian government sto...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>How can I increase the speed of my internet co...</td>\n",
" <td>How can Internet speed be increased by hacking...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>Why am I mentally very lonely? How can I solve...</td>\n",
" <td>Find the remainder when [math]23^{24}[/math] i...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>Which one dissolve in water quikly sugar, salt...</td>\n",
" <td>Which fish would survive in salt water?</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" <td>12</td>\n",
" <td>Astrology: I am a Capricorn Sun Cap moon and c...</td>\n",
" <td>I'm a triple Capricorn (Sun, Moon and ascendan...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>6</td>\n",
" <td>13</td>\n",
" <td>14</td>\n",
" <td>Should I buy tiago?</td>\n",
" <td>What keeps childern active and far from phone ...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>7</td>\n",
" <td>15</td>\n",
" <td>16</td>\n",
" <td>How can I be a good geologist?</td>\n",
" <td>What should I do to be a great geologist?</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>8</td>\n",
" <td>17</td>\n",
" <td>18</td>\n",
" <td>When do you use シ instead of し?</td>\n",
" <td>When do you use \"&amp;\" instead of \"and\"?</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>9</td>\n",
" <td>19</td>\n",
" <td>20</td>\n",
" <td>Motorola (company): Can I hack my Charter Moto...</td>\n",
" <td>How do I hack Motorola DCX3400 for free internet?</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>10</td>\n",
" <td>21</td>\n",
" <td>22</td>\n",
" <td>Method to find separation of slits using fresn...</td>\n",
" <td>What are some of the things technicians can te...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>11</td>\n",
" <td>23</td>\n",
" <td>24</td>\n",
" <td>How do I read and find my YouTube comments?</td>\n",
" <td>How can I see all my Youtube comments?</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>12</td>\n",
" <td>25</td>\n",
" <td>26</td>\n",
" <td>What can make Physics easy to learn?</td>\n",
" <td>How can you make physics easy to learn?</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>13</td>\n",
" <td>27</td>\n",
" <td>28</td>\n",
" <td>What was your first sexual experience like?</td>\n",
" <td>What was your first sexual experience?</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>14</td>\n",
" <td>29</td>\n",
" <td>30</td>\n",
" <td>What are the laws to change your status from a...</td>\n",
" <td>What are the laws to change your status from a...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>15</td>\n",
" <td>31</td>\n",
" <td>32</td>\n",
" <td>What would a Trump presidency mean for current...</td>\n",
" <td>How will a Trump presidency affect the student...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>16</td>\n",
" <td>33</td>\n",
" <td>34</td>\n",
" <td>What does manipulation mean?</td>\n",
" <td>What does manipulation means?</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>17</td>\n",
" <td>35</td>\n",
" <td>36</td>\n",
" <td>Why do girls want to be friends with the guy t...</td>\n",
" <td>How do guys feel after rejecting a girl?</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>18</td>\n",
" <td>37</td>\n",
" <td>38</td>\n",
" <td>Why are so many Quora users posting questions ...</td>\n",
" <td>Why do people ask Quora questions which can be...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>19</td>\n",
" <td>39</td>\n",
" <td>40</td>\n",
" <td>Which is the best digital marketing institutio...</td>\n",
" <td>Which is the best digital marketing institute ...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id qid1 qid2 question1 \\\n",
"0 0 1 2 What is the step by step guide to invest in sh... \n",
"1 1 3 4 What is the story of Kohinoor (Koh-i-Noor) Dia... \n",
"2 2 5 6 How can I increase the speed of my internet co... \n",
"3 3 7 8 Why am I mentally very lonely? How can I solve... \n",
"4 4 9 10 Which one dissolve in water quikly sugar, salt... \n",
"5 5 11 12 Astrology: I am a Capricorn Sun Cap moon and c... \n",
"6 6 13 14 Should I buy tiago? \n",
"7 7 15 16 How can I be a good geologist? \n",
"8 8 17 18 When do you use シ instead of し? \n",
"9 9 19 20 Motorola (company): Can I hack my Charter Moto... \n",
"10 10 21 22 Method to find separation of slits using fresn... \n",
"11 11 23 24 How do I read and find my YouTube comments? \n",
"12 12 25 26 What can make Physics easy to learn? \n",
"13 13 27 28 What was your first sexual experience like? \n",
"14 14 29 30 What are the laws to change your status from a... \n",
"15 15 31 32 What would a Trump presidency mean for current... \n",
"16 16 33 34 What does manipulation mean? \n",
"17 17 35 36 Why do girls want to be friends with the guy t... \n",
"18 18 37 38 Why are so many Quora users posting questions ... \n",
"19 19 39 40 Which is the best digital marketing institutio... \n",
"\n",
" question2 is_duplicate \n",
"0 What is the step by step guide to invest in sh... 0 \n",
"1 What would happen if the Indian government sto... 0 \n",
"2 How can Internet speed be increased by hacking... 0 \n",
"3 Find the remainder when [math]23^{24}[/math] i... 0 \n",
"4 Which fish would survive in salt water? 0 \n",
"5 I'm a triple Capricorn (Sun, Moon and ascendan... 1 \n",
"6 What keeps childern active and far from phone ... 0 \n",
"7 What should I do to be a great geologist? 1 \n",
"8 When do you use \"&\" instead of \"and\"? 0 \n",
"9 How do I hack Motorola DCX3400 for free internet? 0 \n",
"10 What are some of the things technicians can te... 0 \n",
"11 How can I see all my Youtube comments? 1 \n",
"12 How can you make physics easy to learn? 1 \n",
"13 What was your first sexual experience? 1 \n",
"14 What are the laws to change your status from a... 0 \n",
"15 How will a Trump presidency affect the student... 1 \n",
"16 What does manipulation means? 1 \n",
"17 How do guys feel after rejecting a girl? 0 \n",
"18 Why do people ask Quora questions which can be... 1 \n",
"19 Which is the best digital marketing institute ... 0 "
]
},
"metadata": {
"tags": []
},
"execution_count": 6
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-MLmEtSRtyiN"
},
"source": [
"## Exploratory Data Analysis"
]
},
{
"cell_type": "code",
"metadata": {
"id": "MBiTaRIJt1Nv",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 434
},
"outputId": "b592cd1b-4282-411e-d2ec-f04c36d8dd45"
},
"source": [
"print('Total number of question pairs for training: {}'.format(len(df)))\n",
"# calculating the pair of percentage of duplicate values\n",
"print('Duplicate pairs: {}%'.format(round(df['is_duplicate'].mean()*100, 2)))\n",
"qids = pd.Series(df['qid1'].tolist() + df['qid2'].tolist())\n",
"\n",
"#calculating the total number of question in dataset\n",
"print('Total number of questions in the training data: {}'.format(len(\n",
" np.unique(qids))))\n",
"\n",
"# calculating the unique question id's\n",
"print('Number of questions that appear multiple times: {}'.format(np.sum(qids.value_counts() > 1)))\n",
"\n",
"\n",
"# Visualizing the trend of questions\n",
"plt.figure(figsize=(12, 5))\n",
"plt.hist(qids.value_counts(), bins=50)\n",
"plt.yscale('log', nonposy='clip')\n",
"plt.title('Log-Histogram of question appearance counts')\n",
"plt.xlabel('Number of occurences of question')\n",
"plt.ylabel('Number of questions')\n",
"print()"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Total number of question pairs for training: 404351\n",
"Duplicate pairs: 36.92%\n",
"Total number of questions in the training data: 789801\n",
"Number of questions that appear multiple times: 13698\n",
"\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtEAAAFMCAYAAAAN2eAHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl4FFX6/v+7SRMQwm4HZVGcfNhM\nRDZRCAjEsARcEaVhQJAZdUQURJYAQlTWILgQWTQj6sAIEXBjxIk4CIMMO8oSQRQBIYIkAWJCAiHJ\n+f3Bz/oSQ2gK0klDv1/XxXWlqrtPPdVPg7cnp6scxhgjAAAAABetTGkXAAAAAFxpCNEAAACATYRo\nAAAAwCZCNAAAAGATIRoAAACwiRANAAAA2ESIBqCGDRvqyJEjXhl7w4YN6tSpU6H90dHRmj17tiRp\n5MiRWrly5QXHWb58uTIzM71So7cdPHhQnTp10n333VdqNaxZs0a//PKLJGnGjBlauHBhqdWCy/fB\nBx+UdgmA3yNEAyh106ZNU0RExAWfM3PmzCs2RG/ZskUul0uffPJJqdXw7rvvWiH6ueeeU+/evUut\nFlyevLw8TZs2rbTLAPweIRpAkU6fPq3x48erS5cuioqK0tSpU5WXlyfp7Mxm+/btFRUVpYSEBDVv\n3lyHDh26pOP069fPCpivvvqqunTpoi5duuiRRx7Rr7/+qtGjR2vfvn3q16+fNm/erBMnTmjIkCHq\n0qWLunXrprfeessa68MPP1R4eLjuvfdeffjhh2rYsKG1f/Dgwerfv78VQGbNmqUuXbooMjJSTzzx\nhH777TdJUlxcnGJiYvTEE0+obdu2GjFihL766iv16NFDbdu21VdffXXe8/j888919913q2vXrnrk\nkUf0888/65tvvtH06dP13Xff6d577y30mu3bt6tbt27q0qWLpk6dqu7du2vDhg2FZvDP3c7JydHE\niRPVpUsXRUREaO7cudbzFixYoKioKHXt2lU9e/bUDz/8oNdee03r16/XiBEjtHz58gK/Bdi9e7fc\nbre6du2q++67T2vWrLGO16tXL82YMUNRUVGKiIjQxo0bz3veixcvVlRUlDp37qw///nPSk5Ott7H\n6OhoPfHEE+rYsaPcbrfS0tIkSREREYqPj1ePHj10xx136LXXXrPG+/LLL3XPPfforrvu0sCBA3Xs\n2DFJUnZ2toYOHWqdd2xsbIHP0KuvvqqoqCht3bpVqamp+stf/qKuXbsqIiJC77zzjvXciIgILVq0\nSD179lTbtm01depU67GPP/7Y+vyNGDFCOTk5F6zpj4p6/fk+G1LB38j8cbuoOh999FFlZGSoa9eu\nOnjwoDV2VFSU7rnnHm3YsOG8tQEoZgaA32vQoIE5fPhwof1vvvmmeeyxx8yZM2dMdna2efDBB83H\nH39scnNzTZs2bcyqVauMMcZMnTrVNGrUyBw8eLDQGOvXrzeRkZGF9o8aNcrMmjXLGGNM3759zccf\nf2z27NljOnfubHJycowxxvzjH/8wH330UaEax40bZ8aNG2eMMeb48eOmQ4cOZtOmTeb48eOmSZMm\n5vvvvzd5eXnm2WefNQ0aNDDGGLN06VLTtGlTs2/fPmOMMTt27DCtW7c2GRkZJi8vzwwYMMCqZ+bM\nmebOO+80qamp5tixYyYsLMy88MILxhhj5s+fb3r37l3ofJKTk02LFi3M/v37jTHGvP3226Z///7W\nsX//+Y8eeOABs3DhQmOMMf/6179M48aNzfr16wu9b+duv/HGG6Z///7m9OnT5uTJk+b+++83K1eu\nNBkZGaZly5YmIyPDGGPM8uXLzVtvvWWMMaZjx45m06ZNBd77vLw8ExUVZZYtW2aMMWb79u3mtttu\nMxkZGWb9+vUmLCzMrFixwhhjTHx8vBkwYECh+lNTU01YWJjVm+joaDNmzBjrfWzWrJn5+eefjTHG\nDB8+3EyaNMmqZ9CgQSY3N9ekpqaa2267zezatcv8/PPPplmzZub77783xhgzd+5c8/TTT1vv6V//\n+leTn59vTpw4YVq1amWdU9++fc3AgQNNXl6eMcaYl156yYwfP94YY8zPP/9sQkNDzS+//GIde9iw\nYSY3N9ccOXLEhIaGmsOHD5uDBw+aO+64wxw5csTk5+ebp556ysTHx1+wpnMV9foLfTbO/Xvwx+0L\n1dm4cWPrNbfffrs5dOiQMcaYTZs2mcmTJxeqDUDxYyYaQJFWrVqlhx9+WE6nU+XLl9c999yjtWvX\nav/+/crJyVH79u0lnZ0FzM/PL3Kcw4cPq2vXrgX+rFixotDzKleurGPHjmnZsmVKT09Xv379dP/9\n9xd63urVq9WnTx9JUtWqVdWpUyetXbtW27ZtU7169dSgQQOVKVOm0JKFevXqqV69epKksLAwrVq1\nSkFBQSpTpoyaNWumgwcPWs9t1qyZatSooWrVqsnlcunOO++UJDVo0EBHjx4tVNPatWt1++2368Yb\nb5QkPfTQQ9qwYYNyc3OLfF9Onz6tpKQkdevWTZLUrVs3lStXrsjn/+6rr75Snz59FBgYqAoVKui+\n++7TF198oXLlysnhcGjJkiVKTU1VVFSUHnvssSLHOXTokFJTU9W9e3dJ0i233KJatWppx44dkqSK\nFSsqMjJSkhQaGmotBzlXjRo1tGXLFl133XWSpJYtWxZ4H2+//XbVrVtXktS5c2d988031mP333+/\nAgICVKNGDbVo0UJbt27Vf//7X7Vq1UoNGjSQJLndbq1cuVJ5eXkaOHCgZs+eLYfDoSpVqqh+/foF\nfvvRvn17lSlz9j9rzz//vMaNGydJqlu3rlwuV4Hn3nPPPQoICFDNmjVVo0YNHT58WGvXrlWzZs1U\ns2ZNORwOzZgxQwMGDLhgTecq6vWX8tm4UJ3n68GiRYuUnJysli1bavTo0R7HBXD5nKVdAADfdezY\nMVWpUsXarlKlitLS0pSenq7KlStb+4ODg62fZ8yYYQXk35dNXH/99fr3v/9dYOzo6OhCx6tZs6bi\n4uI0b948TZgwQbfddptefPFFXX/99YXqOvf4lStX1tGjR/Xbb78VqLdmzZoFXnfuY9nZ2ZoyZYr1\nq+/09HR16NDBerxixYrWzwEBAapQoYIkqUyZMuf9H4bjx48XqKlSpUoyxuj48eOFnvu7EydOWM+V\nJIfDoWrVqhX5/N9lZGRoypQpeuWVVySdXd7RpEkTlS1bVu+++67mzp2ruLg4NWzYUDExMdaSlj86\nduyYKlWqJIfDYe37/X9krr32WquuC513Xl6eZs6caYXKkydP6qabbrIer1q1aoGxf18yI6nQZ+v3\nxzZv3qyuXbtajwUFBenEiRPKyMjQ1KlT9dNPP6lMmTI6cuSIevTocd7xduzYoRkzZujw4cMqU6aM\nUlJSCtQfFBRk/RwQEKC8vLxCPfz9f2gyMjKKrKlGjRrWvqJefymfjQvV+Udz5szRnDlz1KNHD11/\n/fUaM2aMWrVq5XFsAJeHEA2gSNdee60V9KSzoe/aa69VUFCQsrKyrP2pqanWz88995yee+45a9vu\n+sw77rhDd9xxh7KyshQbG6vp06drxowZ562rVq1aF6zrfDPGv3vvvfe0f/9+ffjhh6pYsaJeffVV\n/frrr7ZqPVeNGjUKzLKmp6erTJkyFwzFvwerzMxMVapUSfn5+db7/cfAdG74DA4O1sCBA9WxY8dC\nY958882aOXOmcnJy9Pe//10xMTFatGhRkTWnp6fLGGMF6T8GQ0+WL1+ulStXasGCBapevbo++OAD\nLVu2zHr83KCYnp5eIOie+9iJEydUpUoVlStXTm3atNHMmTMLHWvEiBEKDQ3VrFmzFBAQILfbXWRd\nI0aMUP/+/dW7d285HA61a9fO47lUq1atQA8zMzN16tQpBQcHF1nTxbz+Qp+NP/7PSXp6usc6/+iG\nG27QlClTlJ+fr48//ljPPfectbYdgPewnANAkTp06KAlS5YoLy9PWVlZ+uSTT9S+fXvVq1dPubm5\nVkBeuHBhgdnMS/X111/rxRdfVH5+vipUqKBGjRpZ4zqdTitIdujQQQkJCZLOzqauWLFCHTp0UGho\nqL7//nsdOHBA+fn5WrJkSZHHSktL05/+9CdVrFhRycnJWr16dYEAbld4eLg2b95sLWVYtGiRwsPD\n5XQWPVdxzTXXqGHDhvriiy8kSZ9++qlOnz4tSXK5XEpJSVFaWpry8vIKBNO77rpLixcvVl5enowx\nmj17tv773//q+++/1zPPPKOcnBwFBgYqLCyswPuXkZFR4Ph16tTRddddp+XLl0uS9YW8Jk2aXPR5\np6WlqXbt2qpevbqOHz+uzz//XCdPnrQe37Jli7UEITExUS1atLAeW758ufLz85WamqqtW7eqZcuW\natu2bYH3cfv27Zo4caJ1rMaNGysgIEBr167VgQMHiuxZWlqadf4fffSRsrOzPfa3ffv22rp1qw4d\nOiRjjGJiYrRkyZIL1nQxr7/QZ8Plcmn37t2Szl4KcevWrR7f87Jlyyo/P1+ZmZk6duyYHn30UWVm\nZqpMmTK69dZbi+XvIgDPmIkGIOnsuuaAgABre+LEierXr58OHjyo7t27y+FwqGvXroqKipLD4dAL\nL7yg0aNHq1KlSnr00UdVpkyZy/6P92233abPPvtMXbp0UWBgoKpXr67JkydLkrp27Sq3262JEydq\n6NCheuGFF9S1a1eVKVNGjz/+uBX8hg0bpkceeUTXXnut3G63Pvroo/Mey+1265lnnlGXLl3UsGFD\nRUdH6+mnn9a77757SbVfd911mjhxogYNGqQzZ86oTp06mjBhgsfXjR8/Xs8//7zeeustRUREWLPr\nN954ox588EHdf//9qlWrlu677z7t2rVLktSnTx8dOnRI3bt3lzFGYWFh6t+/vypUqKA6dero7rvv\nVtmyZVWxYkWNHz9ektSlSxcNGzZMzzzzjHVsh8OhV155RTExMXrjjTd0zTXX6PXXX7eWrlyMu+++\nW5999pk6deqkunXraujQoXryySc1depUVaxYUW3atNGLL76oXbt2qVatWho7dqz12vr166tnz55K\nTk5Wv379VL9+fUnShAkT9NRTT+nMmTOqWLGixowZI0l68sknNWXKFM2ePVt33XWXBg8erJkzZ6px\n48aF6hoyZIieeuopVa1aVW63W7169dK4ceP0/vvvF3ku1113nV566SX1799fAQEBuuWWW/Too4+q\nXLlyRdZ0sa8v6rPx8MMPa/DgwercubNuvvlmdenSxeN77nK51KJFC3Xs2FFvvvmm2rVrpwcffFAB\nAQEqW7asJk2a5HEMAJfPYYwxpV0EgCtbVlaWmjVrps2bNxdYR1sazl2a8MMPP6hPnz7atGlTqdZk\nR6dOnTRx4kTdfvvtpV3KZYuLi9ORI0fOG+oiIiI0bdo0tWzZshQqA4DLx3IOAJfkwQcftJYBLF++\nXCEhIaUeoHNzc9WuXTtt27bNqqtp06alWhMA4OrEcg4Al2T06NF66aWX9Prrr6tixYoFblhRWpxO\np2JiYjRq1CgZY+RyufjVNgDAK1jOAQAAANjEcg4AAADAJkI0AAAAYNMVuSY6JSXD85MuQrVqFXT8\n+KVfFxZXFvrtf+i5f6Hf/oV++5fS7LfLdf4vzfv1TLTTGeD5Sbhq0G//Q8/9C/32L/Tbv/hiv/06\nRAMAAACXghANAAAA2OQTa6JTUlL05ptvKjc3V263W40aNSrtkgAAAIAieXUmes+ePYqMjNSCBQus\nfZMnT1avXr3kdru1fft2SdKSJUtUu3ZtlS9fXi6Xy5slAQAAAJfNayE6KytLEyZMUOvWra19Gzdu\n1IEDB5SQkKBJkyZZdxI7fPiwOnfurF69eukf//iHt0oCAAAAioXXQnRgYKDi4+MVHBxs7Vu3bp0i\nIyMlSSEhIUpPT1dmZqZq1KghY4wqVKig7Oxsb5UEAAAAFAuvrYl2Op1yOgsOn5qaqtDQUGu7evXq\nSklJUc+ePTVz5kzl5eXpiSee8Dh2tWoViu1SJ0Vd+w9XJ/rtf+i5f6Hf/oV++xdf63epfrHQGCNJ\nql27tmJjYy/6dcV1sW2Xq1Kx3bgFvo9++x967l/ot3+h3/6lNPvtEzdbCQ4OVmpqqrV99OhRvkgI\nAACAK06Jhujw8HAlJiZKkpKSkhQcHKygoKCSLAEAAAC4bF5bzrFz507FxsYqOTlZTqdTiYmJiouL\nU2hoqNxutxwOh2JiYrx1eK8ZOHWlrefPi47wUiUAAAAoLV4L0WFhYZo/f36h/cOHD/fWIQEAAIAS\nwW2/AQAAAJsI0QAAAIBNhGgAAADAJkI0AAAAYBMhGgAAALCJEA0AAADYRIgGAAAAbCJEAwAAADYR\nogEAAACbCNEAAACATYRoAAAAwCZCNAAAAGATIRoAAACwiRANAAAA2ESIBgAAAGwiRAMAAAA2EaIB\nAAAAmwjRAAAAgE2EaAAAAMAmQjQAAABgEyEaAAAAsIkQDQAAANhEiAYAAABsIkQDAAAANhGiAQAA\nAJsI0QAAAIBNhGgAAADAJkI0AAAAYBMhGgAAALCJEA0AAADYRIgGAAAAbCJEAwAAADY5S7sASYqL\ni9ORI0dUuXJl3XvvvWrcuHFplwQAAAAUyasz0Xv27FFkZKQWLFhg7Zs8ebJ69eolt9ut7du3W/vL\nly+vM2fOKDg42JslAQAAAJfNazPRWVlZmjBhglq3bm3t27hxow4cOKCEhATt3btXY8aMUUJCgh5+\n+GFVrVpVKSkpeu+99zRs2DBvlQUAAABcNq/NRAcGBio+Pr7AzPK6desUGRkpSQoJCVF6eroyMzP1\n008/yel0qnLlysrJyfFWSQAAAECx8NpMtNPplNNZcPjU1FSFhoZa29WrV1dKSopOnTql6OhoOZ1O\nPf744x7HrlatgpzOgGKp0+WqVCzjlNb4sId++B967l/ot3+h3/7F1/pdql8sNMZIkjp27KiOHTte\n9OuOH88qluO7XJWUkpJRLGMVxdvj4+KVRL/hW+i5f6Hf/oV++5fS7HdR4b1EL3EXHBys1NRUa/vo\n0aNyuVwlWQIAAABw2Uo0RIeHhysxMVGSlJSUpODgYAUFBZVkCQAAAMBl89pyjp07dyo2NlbJycly\nOp1KTExUXFycQkND5Xa75XA4FBMT463DAwAAAF7jtRAdFham+fPnF9o/fPhwbx0SAAAAKBHc9hsA\nAACwiRANAAAA2ESIBgAAAGwiRAMAAAA2EaIBAAAAmwjRAAAAgE2EaAAAAMAmQjQAAABgEyEaAAAA\nsIkQDQAAANhEiAYAAABsIkQDAAAANhGiAQAAAJsI0QAAAIBNhGgAAADAJkI0AAAAYBMhGgAAALCJ\nEA0AAADYRIgGAAAAbCJEAwAAADYRogEAAACbCNEAAACATYRoAAAAwCZCNAAAAGATIRoAAACwiRAN\nAAAA2ESIBgAAAGwiRAMAAAA2EaIBAAAAmwjRAAAAgE0eQ/SZM2d05MgRSdLu3bv18ccfKzs72+uF\nAQAAAL7KY4iOjo7Wt99+q19//VVPP/209uzZo+jo6JKoDQAAAPBJHkP0r7/+qq5du2r58uXq06eP\nRo4cqfT09GIvJCUlRW3btlVubm6xjw0AAAAUJ48hOicnR8YYrVixQh06dJAkZWVlXdTge/bsUWRk\npBYsWGDtmzx5snr16iW3263t27db+9955x3ddtttNssHAAAASp7T0xNatWqlFi1aqF27drrpppv0\n7rvv6qabbvI4cFZWliZMmKDWrVtb+zZu3KgDBw4oISFBe/fu1ZgxY5SQkKBPPvlEnTt31qJFiy7v\nbHzQwKkrbT1/XnSElyoBAABAcfE4Ez18+HCtWrVKr7/+uiQpMjJSkyZN8jhwYGCg4uPjFRwcbO1b\nt26dIiMjJUkhISFKT09XZmamtm3bpjVr1mjXrl367LPPLvVcAAAAgBLhcSb6hx9+0OLFi5Weni5j\njLV/2rRpFx7Y6ZTTWXD41NRUhYaGWtvVq1dXSkqKxo8fL0lKTk5W9+7dPRZdrVoFOZ0BHp93MVyu\nSsUyTnHxtXquNry//oee+xf67V/ot3/xtX57DNFDhw5VVFSUGjduXOwHPzeUS9LUqVMv6nXHj1/c\nmmxPXK5KSknJKJaxiouv1XM18cV+w7vouX+h3/6FfvuX0ux3UeHdY4i+9tprNXjw4GIpIjg4WKmp\nqdb20aNH5XK5imVsAAAAoKR4XBN955136uuvv1ZOTo7y8/OtP5ciPDxciYmJkqSkpCQFBwcrKCjo\nksYCAAAASovHmeg5c+YoMzOzwD6Hw6Fdu3Zd8HU7d+5UbGyskpOT5XQ6lZiYqLi4OIWGhsrtdsvh\ncCgmJubyqgcAAABKgccQvXnz5ksaOCwsTPPnzy+0f/jw4Zc0HgAAAOArPIbokydP6t1339WOHTvk\ncDjUrFkzPfLIIypfvnxJ1AcAAAD4HI9roseNG6fMzEy53W49/PDDSklJ0fPPP18StQEAAAA+yeNM\ndGpqql555RVru2PHjurXr59XiwIAAAB8mceZ6OzsbGVnZ1vbWVlZOn36tFeLAgAAAHyZx5noXr16\nKSoqSmFhYTLG6LvvvtOQIUNKojYAAADAJ3kM0T179lR4eLiSkpLkcDg0fvx41axZsyRqAwAAAHxS\nkSF69erVat++vZYsWVJg/5o1aySdDdcAAACAPyoyRH///fdq3769tmzZct7HCdEAAADwV0WG6Mcf\nf1yS1LZtW3Xv3r3AYwsXLvRuVQAAAIAPKzJE79q1Szt37tS8efMKXJ0jNzdXs2bNUu/evUukQAAA\nAMDXFBmiAwMDlZaWpoyMjAJLOhwOh0aOHFkixQEAAAC+qMgQHRISopCQEN1xxx1q2rSptT8/P19l\nyni8vDQAAABw1fKYhn/66Sf985//VF5ennr37q277rpL77//fknUBgAAAPgkjyE6ISFBDz30kFas\nWKH69evrP//5jz7//POSqA0AAADwSR5DdLly5RQYGKjVq1crKiqKpRwAAADwexeViF988UVt3bpV\nrVq10jfffKOcnBxv1wUAAAD4LI8hevr06brxxhs1d+5cBQQEKDk5WS+++GJJ1AYAAAD4JI8hOjg4\nWDfeeKPWrl0rSWrSpIkaNmzo9cIAAAAAX+UxRL/88staunSpPvzwQ0nSsmXLNHHiRK8XBgAAAPgq\njyF606ZNeuONN1SxYkVJ0lNPPaWkpCSvFwYAAAD4qou6Ood09k6FkpSXl6e8vDzvVgUAAAD4sCLv\nWPi75s2ba/To0Tp69KjeeecdffHFF2rVqlVJ1AYAAAD4JI8h+tlnn9W///1vlS9fXkeOHNGjjz6q\nzp07l0RtAAAAgE/yGKIPHjyo0NBQhYaGFthXt25drxYGAAAA+CqPIbp///7WeuicnBwdO3ZM9evX\n18cff+z14gAAAABf5DFEr1y5ssD2Dz/8oCVLlnitIAAAAMDXXdRtv89Vv359LnEHAAAAv+ZxJvr1\n118vsH3kyBH99ttvXivI3w2cutLzk84xLzrCS5UAAACgKB5nogMCAgr8adiwoeLj40uiNgAAAMAn\neZyJHjRo0Hn35+fnS5LKlLG9IgQAAAC4onkM0U2aNDnvHQqNMXI4HNq1a5dXCgMAAAB8lccQ/dRT\nT+n//u//FB4eLofDoa+++kr79+8vcoYaAAAAuNp5XIuxfv16derUSRUqVNA111yjbt26acOGDcVa\nxJYtWzRixAgNHTpUO3bsKNaxAQAAgOLmMUSfOHFCq1ev1smTJ3Xy5EmtXr1ax44du6jB9+zZo8jI\nSC1YsMDaN3nyZPXq1Utut1vbt2+XJAUFBWnixIkaOHCgNm7ceImnAgAAAJQMj8s5JkyYoKlTp+rZ\nZ5+VJDVo0EAxMTEeB87KytKECRPUunVra9/GjRt14MABJSQkaO/evRozZowSEhLUsGFDrV69Wm+/\n/bYmTpx4GacDAAAAeN9FfbHw/ffftz1wYGCg4uPjC1wOb926dYqMjJQkhYSEKD09XZmZmdq7d6/u\nvPNO3XLLLXrjjTc0fvx428cDAAAASorHEH3JAzudcjoLDp+amqrQ0FBru3r16kpJSVF6errGjx+v\nrKws3XvvvR7HrlatgpzOgGKp0+WqVCzjlJYrvf6Sxvvlf+i5f6Hf/oV++xdf67fXQvTFMMZIku68\n807deeedF/2648eziuX4LlclpaRkFMtYpeVKr78kXQ39hj303L/Qb/9Cv/1Lafa7qPBe5BcLly5d\nKklavHhxsRURHBys1NRUa/vo0aNyuVzFNj4AAABQEoqciZ4zZ47OnDmj9957Tw6Ho9DjPXv2tH2w\n8PBwxcXFye12KykpScHBwQoKCrI9DgAAAFCaigzRI0eO1OrVq5WRkaEtW7YUetxTiN65c6diY2OV\nnJwsp9OpxMRExcXFKTQ0VG63Ww6H46Ku8gEAAAD4miJDdOfOndW5c2clJiaqS5cutgcOCwvT/Pnz\nC+0fPny47bEAAAAAX+Lxi4VNmzbVmDFjtGPHDjkcDjVt2lRDhw5V9erVS6I+AAAAwOd4vGNhTEyM\nQkND9corr2j69On605/+pDFjxpREbQAAAIBP8jgTnZ2drT//+c/WdoMGDbRy5UqvFgUAAAD4Mo8z\n0dnZ2Tp69Ki1feTIEeXk5Hi1KAAAAMCXeZyJHjRokHr06CGXyyVjjI4dO6ZJkyaVRG0AAACAT/IY\nojt06KAvv/xS+/fvlyTddNNNKleunLfrAgAAAHzWRd32u3z58mrUqJG3awEAAACuCB7XRAMAAAAo\nyONMtDHmvLf9hm8YONXelVLmRUd4qRIAAAD/4XEm+pFHHimJOgAAAIArhseZ6MaNG+v1119Xs2bN\nVLZsWWt/69atvVoYAAAA4Ks8huhdu3ZJkjZv3mztczgchGgAAAD4LY8hev78+ZJYGw0AAAD8zuOa\n6N27d6tHjx6KioqSJM2aNUvbtm3zemEAAACAr/IYol966SVNnjxZLpdLktStWzdNmTLF64UBAAAA\nvspjiHY6nQVutHLTTTfJ6byoe7QAAAAAV6WLCtEHDx601kOvXr1axhivFwYAAAD4Ko9TyqNGjdKg\nQYO0b98+NW/eXHXq1FFsbGyPHYLIAAAYoElEQVRJ1AYAAAD4JI8humHDhlq2bJmOHTumwMBABQUF\nlURdAAAAgM/yGKJ//PFHxcXF6ccff5TD4VCDBg00ePBg/elPfyqJ+gAAAACf4zFEjxw5Un369NEz\nzzwjSdqyZYtGjBihpUuXer04AAAAwBd5DNEVK1ZUz549re2QkBAlJiZ6tSgAAADAlxUZovPz8yVJ\nrVu31hdffKE2bdrI4XBo3bp1uu2220qsQBSvgVNX2nr+vOgIL1UCAABw5SoyRN98881yOBznvZyd\n0+nU3/72N68WBgAAAPiqIkP07t27S7IOAAAA4IrhcU30r7/+qsTERGVkZBSYlR48eLBXCwMAAAB8\nlcc7Fj722GPatWuXzpw5o9zcXOsPAAAA4K88zkRXrVpVU6ZMKYlaAAAAgCuCxxDdqVMnffrpp2rW\nrJkCAgKs/bVq1fJqYQAAAICv8hiiv//+ey1btkxVq1a19jkcDq1atcqbdQEAAAA+y2OI3rZtmzZt\n2qTAwMCSqAcAAADweR6/WBgWFqbTp0+XRC0AAADAFeGiLnEXERGhkJCQAmui//nPfxZbEd98840W\nL16svLw89evXT2FhYcU2NgAAAFDcPIboy7kz4Z49ezRo0CANGDBAffv2lSRNnjxZ27Ztk8Ph0Jgx\nY9SkSRNdc801iomJ0U8//aQNGzYQon2I3duES9wqHAAAXP08LufIy8s77x9PsrKyNGHCBLVu3dra\nt3HjRh04cEAJCQmaNGmSJk2aJElq1KiRzpw5o/fff1/333//ZZwOAAAA4H0eZ6Jnz55t/XzmzBn9\n+OOPat68eYFwfD6BgYGKj49XfHy8tW/dunWKjIyUJIWEhCg9PV2ZmZkyxujll1/WsGHDClwFBAAA\nAPBFHkP0/PnzC2ynpaVpxowZngd2OuV0Fhw+NTVVoaGh1nb16tWVkpKijz76SCdPntTs2bPVsmVL\ndenS5YJjV6tWQU5nwAWfc7FcrkrFMg7+H19+T325NngHPfcv9Nu/0G//4mv99hii/6hGjRr66aef\niuXgxhhJ0rBhw2y97vjxrGI5vstVSSkpGcUyFv4fX31P6bf/oef+hX77F/rtX0qz30WFd48hesSI\nEXI4HNb24cOHVaaMx6XU5xUcHKzU1FRr++jRo3K5XJc0FgAAAFBaPIboNm3aWD87HA4FBQUpPDz8\nkg4WHh6uuLg4ud1uJSUlKTg4WEFBQZc0FgAAAFBaPIboBx544JIG3rlzp2JjY5WcnCyn06nExETF\nxcUpNDRUbrdbDodDMTExlzQ2AAAAUJqKDNEREREFlnEYY+RwOJSTk6PU1FTt2rXrggOHhYUV+lKi\nJA0fPvwyygUAAABKX5EheuXKwjfZ+PLLLzVjxgw9+OCDXi0KAAAA8GUXdXWO/fv3a+LEiSpbtqze\neust1a1b19t1AQAAAD7rgiE6KytLs2bN0urVqzVixAi1b9++pOoCAAAAfFaR16r717/+pR49eqhK\nlSr66KOPCNAAAADA/6/Imejhw4erXr16WrNmjb7++mtr/+9fMPzHP/5RIgXiyjNwauH19BcyLzrC\nS5UAAAB4R5Eh+j//+U9J1gEAAABcMYoM0bVr1y7JOgAAAIArxqXdvxsAAADwY4RoAAAAwCZCNAAA\nAGATIRoAAACwiRANAAAA2ESIBgAAAGwiRAMAAAA2FXmdaMBXcUdEAABQ2gjRKHV2QzEAAEBpYzkH\nAAAAYBMhGgAAALCJEA0AAADYRIgGAAAAbCJEAwAAADYRogEAAACbCNEAAACATYRoAAAAwCZCNAAA\nAGATIRoAAACwiRANAAAA2ESIBgAAAGwiRAMAAAA2OUu7AMDXDJy60tbz50VHeKkSAADgq5iJBgAA\nAGwiRAMAAAA2+USIPnr0qIYMGaLFixeXdikAAACAR14N0Xv27FFkZKQWLFhg7Zs8ebJ69eolt9ut\n7du3ny2iTBn16tXLm6UAAAAAxcZrXyzMysrShAkT1Lp1a2vfxo0bdeDAASUkJGjv3r0aM2aMEhIS\ndO2112rv3r3eKgV+zu4XBQEAADzxWogODAxUfHy84uPjrX3r1q1TZGSkJCkkJETp6enKzMxUUFCQ\nrbGrVasgpzOgWOp0uSoVyzjwX3yGfBv98S/027/Qb//ia/32Woh2Op1yOgsOn5qaqtDQUGu7evXq\nSklJ0Y4dO7Rw4UJlZGSoatWq6tSp0wXHPn48q1hqdLkqKSUlo1jGgv/iM+S7+DvuX+i3f6Hf/qU0\n+11UeC/V60QbYyRJrVu3LrDsA7iSXMpyEa4tDQDAla1Er84RHBys1NRUa/vo0aNyuVwlWQIAAABw\n2Uo0RIeHhysxMVGSlJSUpODgYNvroQEAAIDS5rXlHDt37lRsbKySk5PldDqVmJiouLg4hYaGyu12\ny+FwKCYmxluHBwAAALzGayE6LCxM8+fPL7R/+PDh3jokAAAAUCJ84o6FAAAAwJWEEA0AAADYRIgG\nAAAAbCrV60QD/srutaW5rjQAAL6FmWgAAADAJkI0AAAAYBPLOYArAMs/AADwLcxEAwAAADYRogEA\nAACbCNEAAACATayJBmB7zfWlYJ02AOBqwkw0AAAAYBMhGgAAALCJEA0AAADYRIgGAAAAbOKLhcBV\nqCS+KAgAgD9jJhoAAACwiRANAAAA2ESIBgAAAGwiRAMAAAA2EaIBAAAAmwjRAAAAgE2EaAAAAMAm\nrhMNwC/ZvZb2vOgIL1VScvzxnAHAW5iJBgAAAGwiRAMAAAA2EaIBAAAAmwjRAAAAgE2EaAAAAMAm\nQjQAAABgEyEaAAAAsIkQDQAAANjkEzdb2b59uxYtWiRjjAYPHqzatWuXdkkAAABAkbw6E71nzx5F\nRkZqwYIF1r7JkyerV69ecrvd2r59uyRp4cKFeuGFFzRo0CAtXrzYmyUBAAAAl81rM9FZWVmaMGGC\nWrdube3buHGjDhw4oISEBO3du1djxoxRQkKCcnNzFRgYKJfLpbS0NG+VBAAAABQLr4XowMBAxcfH\nKz4+3tq3bt06RUZGSpJCQkKUnp6uzMxMXXPNNTp9+rSOHDmi66+/3uPY1apVkNMZUCx1ulyVimUc\nABc2cOrK0i7hsvjivxX3PPeJV8e3e85261k24z5bz78YvtgneM/v/faFz56v87X36FLq8bW/314L\n0U6nU05nweFTU1MVGhpqbVevXl0pKSnq1auXXnjhBeXl5WnYsGEexz5+PKtYanS5KiklJaNYxgJw\ndfPHfyu8fc7FPT7/pvuXy+k3nxPPfPE9Kq2aigrvpfrFQmOMJCk0NFRTpkwpzVIAAACAi1ail7gL\nDg5WamqqtX306FG5XK6SLAEAAAC4bCUaosPDw5WYmChJSkpKUnBwsIKCgkqyBAAAAOCyeW05x86d\nOxUbG6vk5GQ5nU4lJiYqLi5OoaGhcrvdcjgciomJ8dbhAQAAAK/xWogOCwvT/PnzC+0fPny4tw4J\nAAAAlAhu+w0AAADYRIgGAAAAbCJEAwAAADYRogEAAACbCNEAAACATYRoAAAAwCaH+f3e2wAAAAAu\nCjPRAAAAgE2EaAAAAMAmQjQAAABgEyEaAAAAsIkQDQAAANhEiAYAAABscpZ2AaVl8uTJ2rZtmxwO\nh8aMGaMmTZqUdkkoZnv27NGgQYM0YMAA9e3bV4cPH9bIkSOVl5cnl8ull19+WYGBgaVdJorRtGnT\ntGXLFuXm5uqJJ57QLbfcQs+vUtnZ2YqOjlZaWppOnz6tQYMGqVGjRvT7Knfq1CndfffdGjRokFq3\nbk2/r1IbNmzQkCFDVL9+fUlSgwYN9Ne//tXn+u2XM9EbN27UgQMHlJCQoEmTJmnSpEmlXRKKWVZW\nliZMmKDWrVtb+2bOnKk+ffro/fff14033qglS5aUYoUobuvXr9cPP/yghIQE/f3vf9fkyZPp+VXs\nq6++UlhYmBYsWKDXXntNU6dOpd9+YM6cOapSpYok/k2/2rVq1Urz58/X/PnzNW7cOJ/st1+G6HXr\n1ikyMlKSFBISovT0dGVmZpZyVShOgYGBio+PV3BwsLVvw4YNuuuuuyRJHTt21Lp160qrPHjBbbfd\nptdff12SVLlyZWVnZ9Pzq1i3bt302GOPSZIOHz6smjVr0u+r3N69e/Xjjz+qQ4cOkvg33d/4Yr/9\nMkSnpqaqWrVq1nb16tWVkpJSihWhuDmdTpUvX77AvuzsbOtXPzVq1KDnV5mAgABVqFBBkrRkyRLd\neeed9NwPuN1uDR8+XGPGjKHfV7nY2FhFR0db2/T76vbjjz/qb3/7m3r37q21a9f6ZL/9dk30ubjz\nuf+h51evL7/8UkuWLNG8efPUuXNnaz89vzotWrRIu3bt0ogRIwr0mH5fXT7++GM1bdpUdevWPe/j\n9PvqUq9ePQ0ePFhRUVE6ePCgHnnkEeXl5VmP+0q//TJEBwcHKzU11do+evSoXC5XKVaEklChQgWd\nOnVK5cuX16+//lpgqQeuDmvWrNHcuXP197//XZUqVaLnV7GdO3eqRo0auv7669W4cWPl5eWpYsWK\n9PsqtWrVKh08eFCrVq3SkSNHFBgYyN/vq1jNmjXVrVs3SdINN9yga6+9Vjt27PC5fvvlco7w8HAl\nJiZKkpKSkhQcHKygoKBSrgre1qZNG6vvX3zxhdq1a1fKFaE4ZWRkaNq0aXrzzTdVtWpVSfT8arZ5\n82bNmzdP0tklellZWfT7Kvbaa69p6dKl+uCDD/TQQw9p0KBB9Psq9umnn+rtt9+WJKWkpCgtLU09\nevTwuX47jK/MiZew6dOna/PmzXI4HIqJiVGjRo1KuyQUo507dyo2NlbJyclyOp2qWbOmpk+frujo\naJ0+fVq1atXSlClTVLZs2dIuFcUkISFBcXFxuummm6x9U6dO1fPPP0/Pr0KnTp3S2LFjdfjwYZ06\ndUqDBw9WWFiYRo0aRb+vcnFxcapdu7batm1Lv69SmZmZGj58uH777TedOXNGgwcPVuPGjX2u334b\nogEAAIBL5ZfLOQAAAIDLQYgGAAAAbCJEAwAAADYRogEAAACbCNEAAACATYRoAD7r0KFDatiwoT79\n9NMC+yMiIopl/IYNGyo3N7dYxipKYmKi7rrrLi1evNirx7lSDRkyRA888ICOHDlSosddtmyZ8vPz\nJUn9+vUrcDc0ALgYhGgAPq1evXqaNWuWMjMzS7uUS7J69Wr95S9/0UMPPVTapfikL774QgsXLtR1\n111XoseNi4uzQvT8+fMVEBBQoscHcOXzy9t+A7hyBAcHq23btpo9e7ZGjhxZ4LEPP/xQ//vf/zR9\n+nRJZ2cUn3zySQUEBGju3Lm67rrrtGPHDt16661q2LChVqxYoRMnTig+Pt4KbXPnztX69et18uRJ\nxcbGqkGDBtq9e7diY2OVm5urM2fOaPz48br55pvVr18/NWrUSLt27dJ7771XIHitWrVKs2bNUvny\n5XXNNddowoQJ+uabb7R69Wpt2bJFAQEB6tWrl/X8ffv2KSYmRsYY5ebm6rnnnlPLli2Vlpam0aNH\nKyMjQwEBARo/frwaNGigxYsXa+HChSpbtqxuv/12DRs2TNHR0WrRooUV0Bs2bKikpCTNmTNHhw4d\n0i+//KJRo0apevXqevHFF5Wdna2srCwNGzZMbdq0UXR0tIKDg7Vnzx7t27dPPXv21GOPPaZTp05p\n9OjROnz4sCRp2LBhatWqldavX69Zs2bJGCOn06kJEyaobt26mj59utavX6/AwEDVrFlTsbGxCgwM\ntM41Ly9PkydPVlJSkiTpjjvu0NChQzV27Fjl5+frr3/9q6ZNm6ZatWpZr5k5c6ZWrFihmjVr6oYb\nbtBvv/2m6dOnKyIiQu+8845uvPFGbdiwQa+99poWLlyoX3755bznuHz5cr399tuqUKGCjDGaMmWK\nPvroIx04cEADBgzQG2+8odtvv11JSUnKycnRuHHjdOTIEeXm5uq+++5Tnz59rM9Zfn6+9u3bp9q1\naysuLk4Oh6M4P+oArjQGAHzUwYMHTd++fc3p06dNt27dzN69e40xxnTs2NEYY8zSpUvNc889Zz2/\nb9++Zu3atWb9+vWmefPm5vjx4+bUqVPmlltuMR999JExxphRo0aZd955xxhjTIMGDczy5cuNMcZ8\n8MEH5umnnzbGGHP33XebAwcOGGOM2bVrl3nggQes8V955ZVCdWZlZZnw8HBz+PBhY4wx8+fPN9HR\n0dbxPvjgg0KvGThwoHXs3bt3m4iICGOMMaNHjzYLFiwwxhizYcMGM23aNHPo0CETERFhsrOzrTH3\n7t1baOwGDRqYM2fOmJkzZ5o+ffqY/Px8Y4wxjz32mFm3bp0xxpijR4+ajh07mjNnzphRo0aZoUOH\nGmOMOXTokGnevLkxxpg33njDTJ061RhjzL59+8zw4cNNVlaW6dy5szl+/LgxxpgVK1aYwYMHmxMn\nTpimTZua3NxcY4wxn332mUlOTi5wrsuWLTOPP/64yc/PN7m5uaZnz55mw4YNBWo+1759+0z79u1N\ndna2yc/PN3/729+sPnfs2NHs37/fGGPM+vXrjdvtvuA53nPPPebbb781xhjz7bffmk2bNhU67u8/\nz50717zwwgvGGGOys7NNx44dzc8//2yWLl1qvf/5+fnmrrvuMklJSYV6CsC/MBMNwOcFBgZq5MiR\nmjRpkt5+++2Lek1ISIiqVq0qSapataqaNWsmSapZs2aBpSHh4eGSpObNm2vevHlKS0vTvn37NHbs\nWOs5mZmZ1q/+mzdvXuhY+/fvV40aNazZ7VatWmnRokUXrG/btm169dVXJZ2dQc7MzNSxY8e0fft2\nPfroo9Y4rVq10r///W+FhoaqfPnyks7eztyTW2+91Zop3bBhg06ePKlZs2ZJkpxOp9LS0qxjSFLt\n2rWVmZmpvLw8bd++Xb1795Z0djnNyy+/rO3btyslJUVPP/20pLOzyw6HQ1WqVFG7du3Ut29fderU\nSd26dSu0NGPbtm1q3bq1HA6HAgIC1LJlS+3YscM69h99//33CgsLs843PDxc33777QXPt6hz7NGj\nh6Kjo9W5c2d17txZt956a5FjbNu2TT169JAklS9fXmFhYdbseZMmTax6rr/+eqWnp1+wHgBXP0I0\ngCtC+/bttXDhQq1YscLa98dfp585c8b6+Y9rXM/dNsZYP5cpU8ba53A4FBgYqLJly2r+/PnnraNs\n2bKF9v2xjt/HupDzPe5wOORwOKzAfu7+c2s+3xg5OTlF1hkYGKi4uDhVr1690BhOZ8H/DPxe+x9r\nCAwMVK1atc77vsycOVN79+7V6tWr1bdvX8XFxalx48ZFnqun9+eP51rUc8/td1HnOGDAAN19991a\ns2aNxo8fr4ceekhut/u8412ozj9+ns7XDwD+hS8WArhijBkzRjNmzLACY1BQkHVVh7S0NP3www+2\nx1y3bp0kaevWrWrQoIEqVaqkOnXqaPXq1ZLOrl1+4403LjhGvXr1lJaWpl9++cUa80IzntLZmeKv\nv/5akvTdd9+patWqqlatmpo1a6Y1a9ZIkjZv3qxRo0bplltu0fbt260Z9CFDhmjnzp2qWLGitW55\n3bp1RYbNFi1a6PPPP5ckHTt2TJMmTbpgbefWcOjQIfXv31/16tXT8ePHtWfPHknSpk2blJCQoIMH\nD+rdd99VSEiIBg4cqE6dOmn37t0FxmvatKn+97//Weu/N27ceMH3p379+kpKStLp06dljLF6IZ3t\n+e/nvH79+gueY15enqZPn65KlSrpgQce0NNPP61t27ZJOhuY/3hllltvvdU676ysLCUlJSk0NPSC\n7xUA/8VMNIArxg033KAuXbpo7ty5ks7+mv/tt9/Www8/rJCQEGvJxsUKCAjQDz/8oEWLFun48eN6\n+eWXJUmxsbGaOHGi3nrrLeXm5io6OvqC45QvX16TJk3Ss88+q8DAQFWoUMFjUB03bpxiYmK0cOFC\n5ebmatq0aZLOBuTRo0frq6++sp5Xq1YtDR48WAMGDJDT6VTz5s0VFhYmp9OpIUOGaNOmTWrbtq0q\nVap03mONHTtW48eP12effaacnBw9+eSTF6ytX79+GjdunPr06aP8/HwNHTpU5cuX18svv6yxY8eq\nXLlykqSXXnpJNWvW1HfffaeePXuqYsWKqlKligYPHlxgvK5du2rr1q3q3bu38vPzFRkZqRYtWhR5\n/JCQEHXv3l09e/aUy+VSnTp1lJWVJUkaOHCgxo4dq3r16hVYWnO+cwwICFC1atXkdrtVuXJlSdLz\nzz8vSWrXrp0efPBBzZkzp9B5//nPf1ZOTo4GDRqkOnXqaOPGjRd8vwD4J4fhd1IAAB/2x6uwAIAv\nYDkHAAAAYBMz0QAAAIBNzEQDAAAANhGiAQAAAJsI0QAAAIBNhGgAAADAJkI0AAAAYBMhGgAAALDp\n/wMXgTcGTOO7GAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 864x360 with 1 Axes>"
]
},
"metadata": {
"tags": []
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "rMwa-CV4tr9c"
},
"source": [
"### Dropping the Null values\n"
]
},
{
"cell_type": "code",
"metadata": {
"id": "xscdzLB0JSjd",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 508
},
"outputId": "43f34e5a-0917-4823-e27f-cba57f9b604c"
},
"source": [
"# Checking the null values\n",
"null = df.isnull().sum()\n",
"print(null)\n",
"# Removing these 2 null values rows wont affect much in data\n",
"df = df.dropna(subset = ['question1', 'question2'])\n",
"\n",
"pd.value_counts(df['is_duplicate']).plot.bar()\n",
"plt.xlabel('Values')\n",
"plt.ylabel('Count')\n",
"plt.title('Class Distribution')"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"id 0\n",
"qid1 0\n",
"qid2 0\n",
"question1 1\n",
"question2 2\n",
"is_duplicate 0\n",
"dtype: int64\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Text(0.5, 1.0, 'Class Distribution')"
]
},
"metadata": {
"tags": []
},
"execution_count": 8
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAgMAAAFjCAYAAACgxwiQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X9Q1Pedx/HXLrBFzqWylLWnScyg\nRj0jGGKigsSCEn/Ua/MDjXBo70LutMVMTOgYjljEJEZiY/OjksbEWKk2QkKnKU0IGCN6yUHpGVKi\n3HlG6zWoGdhtIChiWGXvj0x3QlRE47LC5/mYyYx89rtf3t9WJs98P7uLxev1egUAAIxlDfQAAAAg\nsIgBAAAMRwwAAGA4YgAAAMMRAwAAGI4YAADAcMQA0E95vV798pe/1Lx58zRr1izNnDlT+fn5OnHi\nhCQpJydHzz//vF9nyMnJ0ZQpUzR79mwlJSXpe9/7noqKitTV1SVJ+vDDD5WZmdnjOdxut955553z\nPtbU1KR58+ZJkn7+85/rkUceueQZX331Vd+ff/CDH6ihoeGSzwEMdMQA0E899dRTKi8v18svv6zK\nykqVlZXJ4/FoyZIl6suPD1m8eLEqKipUVVWln//859qxY4fy8/MlSTExMXr55Zd7fH5tba127dp1\n3seGDh2qN95447JnO3v2rNatW+f7uqioSOPHj7/s8wEDFTEA9EOtra3aunWrCgoKNHToUElSWFiY\n8vLydN99950TAx988IHuuusuzZ49W3PnzlV1dbUk6cyZM3rkkUc0a9YspaSkaNmyZTp58uQF1y9m\nxIgRev755/Xmm2/qz3/+s2pra5WSkiJJOnjwoO655x5997vf1e23365t27apoaFBjz76qCorK/Xg\ngw/q6NGjmjZtmp544gllZGTo6NGj+od/+Aff+dvb27VkyRIlJydr0aJFcrvdkqTk5GTt3bvXd9zf\nvv6Xf/kXnThxQrNnz1ZjY2O349566y3NmzdPs2fP1uLFi/Xxxx9L+uIOxKOPPqqsrCzNmDFDqamp\nam5uvtz/q4B+gRgA+qH6+np9+9vf1siRI7utf+Mb31BycrKs1u4/2nl5ecrMzFRFRYX+7d/+TatW\nrZIkvffeezp69KgqKiq0Y8cOjRo1Sh988MEF13vjm9/8puLi4vTHP/6x2/qGDRu0cOFCvfnmmyou\nLlZ1dbVGjx6tjIwMzZo1S08//bSkL0Jn3Lhx2rZt2znn/o//+A+tXLlSu3bt0tChQ/Xiiy/2OMsT\nTzyhoKAgVVRU6Nprr/WtHz9+XD/5yU9UWFioiooKfec731FeXp7v8YqKCuXm5mrnzp2KjIzUb37z\nm15dO9BfEQNAP9Ta2qrIyMheH//6669rzpw5kqSbb75ZjY2NkiSHw6HDhw/r7bffVkdHh5YvX67E\nxMQLrvfW4MGDfa9d+JvIyEhVVlaqoaFBERERev7552Wz2c55rsfj8d1N+Kqbb77Z9y/12bNn609/\n+lOvZ/qy//zP/9TkyZM1YsQISdL8+fNVW1urM2fOSJImTZqk4cOHy2KxaNy4cfrkk08u6/sA/QUx\nAPRDERERampq6vXxv//975WamqpZs2bp3nvv9W0jxMTEaOXKldq6dasSEhKUnZ2ttra2C6731rFj\nx86JlR//+Me64YYbtHz5ck2fPl2//vWvz/vcoKAgDR48+LyPORwO35/tdrs+++yzXs/0ZS0tLQoP\nD+92Lq/Xq5aWFt/XX57n7Nmzl/V9gP6CGAD6oYkTJ+qvf/3rOa+M93g8evrpp9XR0eFba2pq0sqV\nK7VmzRpVVlbqpZde6vac2bNna+vWraqqqlJHR4fvBX8XWr+YxsZG/e///q+mTJnSbf3v/u7v9NBD\nD+ntt9/Whg0b9Nxzz+nIkSOXdN1f/pd/W1ubhgwZIkmyWq2+dzB89bjziYyMVGtra7fjrVarIiIi\nLmkeYKAgBoB+KDw8XPfdd58efvhh/eUvf5EkdXR0KC8vT//93/+tQYMG+Y799NNPFRYWpujoaJ05\nc0YlJSWSvngx3m9+8xsVFhZKkoYMGaLo6GhJuuD6xRw9elQPPfSQ0tPTNWzYsG6PLV26VB999JEk\n6YYbbtDgwYNlsVgUHBx8zpbChbz//vs6fvy4pC/29W+++WZJUlRUlA4cOCBJKi8v1+effy5JCgkJ\nUVdX1zkvfkxISNDevXt92yXFxcVKSEhQcHBwr+YABhr+5gP91P33369vfvOb+uEPf6izZ8/KarVq\nxowZvrf1/c3YsWN12223adasWYqMjFROTo7q6uq0aNEibd68Wbm5ubr99tsVFBSkESNGqKCgQJIu\nuP5Vv/rVr1RWVqbOzk6FhIRo4cKF+ud//udzjsvIyFB2drY8Ho8kKT09Xddff70SEhL0y1/+Unff\nfbeeffbZHq85OTlZjz32mA4ePKhrrrnG97kDP/rRj7Rq1Sq9+uqrmjVrlkaNGiXpi0i4+eablZSU\npI0bN/rO8+1vf1uPP/64fvSjH8nj8eiaa67RY4891qv/3YGByOLtyzckAwCAqw7bBAAAGI4YAADA\ncMQAAACGIwYAADAcMQAAgOGMfWuhy9W79zXj6hQREaaWllOBHgMwDj97/VdUlP2Cj3FnAP1ScHBQ\noEcAjMTP3sBEDAAAYDhiAAAAwxEDAAAYjhgAAMBwxAAAAIYjBgAAMBwxAACA4YgBAAAMRwwAAGA4\nYgAAAMMRAwAAGI4YAADAcMb+1sKB7t6CXYEeAZdpc05yoEcAYBjuDAAAYDi/3hlYt26d3n//fZ05\nc0ZLlizRrl271NDQoCFDhkiSMjMz9Z3vfEdlZWUqKiqS1WrVggULNH/+fHk8HuXk5Oj48eMKCgrS\n2rVrde211+rAgQPKz8+XJI0ZM0arV6+WJG3atEkVFRWyWCxatmyZpk+f7s9LAwBgwPBbDPzhD3/Q\nRx99pJKSErW0tOjOO+/UlClT9NBDDykpKcl33KlTp1RYWKjS0lKFhIQoNTVVKSkpqqqqUnh4uNav\nX6/33ntP69ev1zPPPKM1a9YoNzdXMTExys7O1p49exQdHa3y8nIVFxfr5MmTSk9P17Rp0xQUxO/d\nBgDgYvy2TXDLLbfo2WeflSSFh4ero6NDZ8+ePee4+vp6TZgwQXa7XaGhoYqLi1NdXZ1qamqUkpIi\nSYqPj1ddXZ06Ozt17NgxxcTESJKSkpJUU1Oj2tpaJSYmymazyeFwaPjw4Tp06JC/Lg0AgAHFbzEQ\nFBSksLAwSVJpaaluu+02BQUFadu2bVq8eLEefPBBffrpp3K73XI4HL7nORwOuVyubutWq1UWi0Vu\nt1vh4eG+YyMjI8859svnAAAAF+f3dxPs3LlTpaWl2rx5s/bv368hQ4Zo3LhxevHFF7VhwwbddNNN\n3Y73er3nPc/51i/l2K+KiAhTcDDbCLj6REXZAz0C0CP+jg48fo2Bd999Vy+88II2bdoku92uqVOn\n+h5LTk5Wfn6+Zs2aJbfb7Vtvbm7WxIkT5XQ65XK5NHbsWHk8Hnm9XkVFRam1tdV3bFNTk5xOp5xO\np44cOXLOek9aWk5dwSsFrhyX60SgRwAuKCrKzt/RfqqniPPbNsGJEye0bt06bdy40ffugfvvv1+N\njY2SpNraWo0ePVqxsbHat2+f2tra1N7errq6Ok2aNEkJCQmqqKiQJFVVVWny5MkKCQlRdHS09u7d\nK0nasWOHEhMTNWXKFO3evVudnZ1qampSc3OzRo0a5a9LAwBgQPHbnYHy8nK1tLRo+fLlvrW77rpL\ny5cv16BBgxQWFqa1a9cqNDRU2dnZyszMlMViUVZWlux2u+bOnavq6mqlpaXJZrOpoKBAkpSbm6u8\nvDx1dXUpNjZW8fHxkqQFCxYoIyNDFotF+fn5slr5CAUAAHrD4u3NBvsANNBvc/EJhP0Xn0CIqxnb\nBP1XQLYJAABA/0AMAABgOGIAAADDEQMAABiOGAAAwHDEAAAAhiMGAAAwHDEAAIDhiAEAAAxHDAAA\nYDhiAAAAwxEDAAAYjhgAAMBwxAAAAIYjBgAAMBwxAACA4YgBAAAMRwwAAGA4YgAAAMMRAwAAGI4Y\nAADAcMQAAACGIwYAADAcMQAAgOGIAQAADEcMAABgOGIAAADDEQMAABiOGAAAwHDEAAAAhiMGAAAw\nHDEAAIDhiAEAAAxHDAAAYDhiAAAAwxEDAAAYjhgAAMBwxAAAAIYjBgAAMBwxAACA4YgBAAAMRwwA\nAGA4YgAAAMMRAwAAGI4YAADAcMQAAACGIwYAADBcsD9Pvm7dOr3//vs6c+aMlixZogkTJmjFihU6\ne/asoqKi9NOf/lQ2m01lZWUqKiqS1WrVggULNH/+fHk8HuXk5Oj48eMKCgrS2rVrde211+rAgQPK\nz8+XJI0ZM0arV6+WJG3atEkVFRWyWCxatmyZpk+f7s9LAwBgwPBbDPzhD3/QRx99pJKSErW0tOjO\nO+/U1KlTlZ6erjlz5uhnP/uZSktLdccdd6iwsFClpaUKCQlRamqqUlJSVFVVpfDwcK1fv17vvfee\n1q9fr2eeeUZr1qxRbm6uYmJilJ2drT179ig6Olrl5eUqLi7WyZMnlZ6ermnTpikoKMhflwcAwIDh\nt22CW265Rc8++6wkKTw8XB0dHaqtrdWMGTMkSUlJSaqpqVF9fb0mTJggu92u0NBQxcXFqa6uTjU1\nNUpJSZEkxcfHq66uTp2dnTp27JhiYmK6naO2tlaJiYmy2WxyOBwaPny4Dh065K9LAwBgQPFbDAQF\nBSksLEySVFpaqttuu00dHR2y2WySpMjISLlcLrndbjkcDt/zHA7HOetWq1UWi0Vut1vh4eG+Yy92\nDgAAcHF+fc2AJO3cuVOlpaXavHmzbr/9dt+61+s97/GXsn6p5/iyiIgwBQezjYCrT1SUPdAjAD3i\n7+jA49cYePfdd/XCCy9o06ZNstvtCgsL0+nTpxUaGqqmpiY5nU45nU653W7fc5qbmzVx4kQ5nU65\nXC6NHTtWHo9HXq9XUVFRam1t9R375XMcOXLknPWetLScuvIXDFwBLteJQI8AXFBUlJ2/o/1UTxHn\nt22CEydOaN26ddq4caOGDBki6Yu9/8rKSknSjh07lJiYqNjYWO3bt09tbW1qb29XXV2dJk2apISE\nBFVUVEiSqqqqNHnyZIWEhCg6Olp79+7tdo4pU6Zo9+7d6uzsVFNTk5qbmzVq1Ch/XRoAAAOK3+4M\nlJeXq6WlRcuXL/etFRQUaOXKlSopKdGwYcN0xx13KCQkRNnZ2crMzJTFYlFWVpbsdrvmzp2r6upq\npaWlyWazqaCgQJKUm5urvLw8dXV1KTY2VvHx8ZKkBQsWKCMjQxaLRfn5+bJa+QgFAAB6w+LtzQb7\nADTQb3PdW7Ar0CPgMm3OSQ70CMAFsU3QfwVkmwAAAPQPxAAAAIYjBgAAMBwxAACA4YgBAAAMRwwA\nAGA4YgAAAMMRAwAAGI4YAADAcMQAAACGIwYAADAcMQAAgOGIAQAADEcMAABgOGIAAADDEQMAABiO\nGAAAwHDEAAAAhiMGAAAwHDEAAIDhiAEAAAxHDAAAYDhiAAAAwxEDAAAYjhgAAMBwxAAAAIYjBgAA\nMBwxAACA4YgBAAAMRwwAAGA4YgAAAMMRAwAAGI4YAADAcMQAAACGIwYAADAcMQAAgOGIAQAADEcM\nAABgOGIAAADDEQMAABiOGAAAwHDEAAAAhiMGAAAwHDEAAIDhiAEAAAxHDAAAYDhiAAAAw/k1Bg4e\nPKiZM2dq27ZtkqScnBz94z/+oxYtWqRFixZp9+7dkqSysjLdfffdmj9/vl577TVJksfjUXZ2ttLS\n0pSRkaHGxkZJ0oEDB7Rw4UItXLhQq1at8n2vTZs2KTU1VfPnz9eePXv8eVkAAAwowf468alTp/TY\nY49p6tSp3dYfeughJSUldTuusLBQpaWlCgkJUWpqqlJSUlRVVaXw8HCtX79e7733ntavX69nnnlG\na9asUW5urmJiYpSdna09e/YoOjpa5eXlKi4u1smTJ5Wenq5p06YpKCjIX5cHAMCA4bc7AzabTS+9\n9JKcTmePx9XX12vChAmy2+0KDQ1VXFyc6urqVFNTo5SUFElSfHy86urq1NnZqWPHjikmJkaSlJSU\npJqaGtXW1ioxMVE2m00Oh0PDhw/XoUOH/HVpAAAMKH6LgeDgYIWGhp6zvm3bNi1evFgPPvigPv30\nU7ndbjkcDt/jDodDLper27rVapXFYpHb7VZ4eLjv2MjIyHOO/fI5AADAxfltm+B8vv/972vIkCEa\nN26cXnzxRW3YsEE33XRTt2O8Xu95n3u+9Us59qsiIsIUHMw2Aq4+UVH2QI+Ar2FByQ8DPQK+hlfv\n+UWgRwiIPo2BL79+IDk5Wfn5+Zo1a5bcbrdvvbm5WRMnTpTT6ZTL5dLYsWPl8Xjk9XoVFRWl1tZW\n37FNTU1yOp1yOp06cuTIOes9aWk5dQWvDLhyXK4TgR4BMNZA/vnr6T80+vSthffff7/vXQG1tbUa\nPXq0YmNjtW/fPrW1tam9vV11dXWaNGmSEhISVFFRIUmqqqrS5MmTFRISoujoaO3du1eStGPHDiUm\nJmrKlCnavXu3Ojs71dTUpObmZo0aNaovLw0AgH7Lb3cG9u/fryeffFLHjh1TcHCwKisrlZGRoeXL\nl2vQoEEKCwvT2rVrFRoaquzsbGVmZspisSgrK0t2u11z585VdXW10tLSZLPZVFBQIEnKzc1VXl6e\nurq6FBsbq/j4eEnSggULlJGRIYvFovz8fFmtfIQCAAC9YfH2YoP98OHDGjlyZLe1P/3pT5o4caLf\nBvO3gXwrSJLuLdgV6BFwmTbnJAd6BHwNWbtWBHoEfA2FyesCPYLfXPY2QVtbmz7++GPl5uaqsbHR\n98+f//xnPfzww1d8UAAA0Pd63Cb44IMPVFRUpP/5n//RD37wA9+61WrVtGnT/D4cAADwvx5jYPr0\n6Zo+fbq2b9+utLS0vpoJAAD0oV69gHDmzJkqKirSZ5991u09/A888IDfBgMAAH2jVy+5X7JkiQ4c\nOCCr1aqgoCDfPwAAoP/r1Z2Bv70NEAAADDy9ujMQGxurw4cP+3sWAAAQAL26M/Duu+9qy5YtioiI\nUHBwsLxerywWi3bv3u3n8QAAgL/1KgZ+8Qszf3EDAAAm6FUM1NTUnHc9NTX1ig4DAAD6Xq9i4P33\n3/f9ubOzUx9++KHi4uKIAQAABoBexcBX30nQ0dGhf//3f/fLQAAAoG9d1q/2GzRokD7++OMrPQsA\nAAiAXt0ZSE9Pl8Vi8X3d1NSkMWPG+G0oAADQd3oVA8uXL/f92WKxaPDgwRo7dqzfhgIAAH2nV9sE\nt956q6xWqxoaGtTQ0KDTp093u1MAAAD6r17FwLPPPqt169apublZTU1Nevzxx7Vx40Z/zwYAAPpA\nr7YJamtrVVxcLKv1i3Y4c+aMMjIytGTJEr8OBwAA/K9Xdwa6urp8ISBJwcHBbBMAADBA9OrOwI03\n3qilS5cqPj5eklRdXa0bb7zRr4MBAIC+cdEYaGxsVG5urt566y3V19fLYrFo0qRJuu+++/piPgAA\n4Gc9bhPU1NQoLS1N7e3t+u53v6vc3Fzddddd2r59u/bv399XMwIAAD/qMQY2bNigzZs3y263+9bG\njBmjF154Qc8884zfhwMAAP7XYwx4vV7dcMMN56yPHj1an3/+ud+GAgAAfafHGDh16tQFH2ttbb3i\nwwAAgL7XYwyMHj1a27dvP2f9pZdeUmxsrN+GAgAAfafHdxOsWLFCWVlZ+t3vfqcbb7xRXV1dqqur\n0+DBg/kEQgAABogeYyAqKkqvvvqqampq9NFHHykoKEhz5szRLbfc0lfzAQAAP+vVhw5NnTpVU6dO\n9fcsAAAgAHr1ccQAAGDgIgYAADAcMQAAgOGIAQAADEcMAABgOGIAAADDEQMAABiOGAAAwHDEAAAA\nhiMGAAAwHDEAAIDhiAEAAAxHDAAAYDhiAAAAwxEDAAAYjhgAAMBwxAAAAIYjBgAAMJxfY+DgwYOa\nOXOmtm3bJkn65JNPtGjRIqWnp+uBBx5QZ2enJKmsrEx333235s+fr9dee02S5PF4lJ2drbS0NGVk\nZKixsVGSdODAAS1cuFALFy7UqlWrfN9r06ZNSk1N1fz587Vnzx5/XhYAAAOK32Lg1KlTeuyxxzR1\n6lTf2nPPPaf09HS98sorGjFihEpLS3Xq1CkVFhZqy5Yt2rp1q4qKitTa2qo33nhD4eHh2r59u5Yu\nXar169dLktasWaPc3FwVFxfr5MmT2rNnjxobG1VeXq5XXnlFGzdu1Nq1a3X27Fl/XRoAAAOK32LA\nZrPppZdektPp9K3V1tZqxowZkqSkpCTV1NSovr5eEyZMkN1uV2hoqOLi4lRXV6eamhqlpKRIkuLj\n41VXV6fOzk4dO3ZMMTEx3c5RW1urxMRE2Ww2ORwODR8+XIcOHfLXpQEAMKD4LQaCg4MVGhraba2j\no0M2m02SFBkZKZfLJbfbLYfD4TvG4XCcs261WmWxWOR2uxUeHu479mLnAAAAFxccqG/s9Xq/9vql\nnuPLIiLCFBwcdNHjgL4WFWUP9AiAsUz9+evTGAgLC9Pp06cVGhqqpqYmOZ1OOZ1Oud1u3zHNzc2a\nOHGinE6nXC6Xxo4dK4/HI6/Xq6ioKLW2tvqO/fI5jhw5cs56T1paTl35CwSuAJfrRKBHAIw1kH/+\negqdPn1rYXx8vCorKyVJO3bsUGJiomJjY7Vv3z61tbWpvb1ddXV1mjRpkhISElRRUSFJqqqq0uTJ\nkxUSEqLo6Gjt3bu32zmmTJmi3bt3q7OzU01NTWpubtaoUaP68tIAAOi3/HZnYP/+/XryySd17Ngx\nBQcHq7KyUk899ZRycnJUUlKiYcOG6Y477lBISIiys7OVmZkpi8WirKws2e12zZ07V9XV1UpLS5PN\nZlNBQYEkKTc3V3l5eerq6lJsbKzi4+MlSQsWLFBGRoYsFovy8/NltfIRCgAA9IbF25sN9gFoIN8K\nkqR7C3YFegRcps05yYEeAV9D1q4VgR4BX0Nh8rpAj+A3V802AQAAuPoQAwAAGI4YAADAcMQAAACG\nIwYAADAcMQAAgOGIAQAADEcMAABgOGIAAADDEQMAABiOGAAAwHDEAAAAhiMGAAAwHDEAAIDhiAEA\nAAxHDAAAYDhiAAAAwxEDAAAYjhgAAMBwxAAAAIYjBgAAMBwxAACA4YgBAAAMRwwAAGA4YgAAAMMR\nAwAAGI4YAADAcMQAAACGIwYAADAcMQAAgOGIAQAADEcMAABgOGIAAADDEQMAABiOGAAAwHDEAAAA\nhiMGAAAwHDEAAIDhiAEAAAxHDAAAYDhiAAAAwxEDAAAYjhgAAMBwxAAAAIYjBgAAMBwxAACA4YL7\n8pvV1tbqgQce0OjRoyVJN9xwg+677z6tWLFCZ8+eVVRUlH7605/KZrOprKxMRUVFslqtWrBggebP\nny+Px6OcnBwdP35cQUFBWrt2ra699lodOHBA+fn5kqQxY8Zo9erVfXlZAAD0a31+Z+DWW2/V1q1b\ntXXrVv3kJz/Rc889p/T0dL3yyisaMWKESktLderUKRUWFmrLli3aunWrioqK1NraqjfeeEPh4eHa\nvn27li5dqvXr10uS1qxZo9zcXBUXF+vkyZPas2dPX18WAAD9VsC3CWprazVjxgxJUlJSkmpqalRf\nX68JEybIbrcrNDRUcXFxqqurU01NjVJSUiRJ8fHxqqurU2dnp44dO6aYmJhu5wAAAL3Tp9sEknTo\n0CEtXbpUn332mZYtW6aOjg7ZbDZJUmRkpFwul9xutxwOh+85DofjnHWr1SqLxSK3263w8HDfsX87\nBwAA6J0+jYHrr79ey5Yt05w5c9TY2KjFixfr7Nmzvse9Xu95n3cp6xc69qsiIsIUHBzUq2OBvhQV\nZQ/0CICxTP3569MYGDp0qObOnStJuu666/Stb31L+/bt0+nTpxUaGqqmpiY5nU45nU653W7f85qb\nmzVx4kQ5nU65XC6NHTtWHo9HXq9XUVFRam1t9R37t3NcTEvLqSt/gcAV4HKdCPQIgLEG8s9fT6HT\np68ZKCsr08svvyxJcrlc+utf/6q77rpLlZWVkqQdO3YoMTFRsbGx2rdvn9ra2tTe3q66ujpNmjRJ\nCQkJqqiokCRVVVVp8uTJCgkJUXR0tPbu3dvtHAAAoHf69M5AcnKyfvzjH+udd96Rx+NRfn6+xo0b\np4cfflglJSUaNmyY7rjjDoWEhCg7O1uZmZmyWCzKysqS3W7X3LlzVV1drbS0NNlsNhUUFEiScnNz\nlZeXp66uLsXGxio+Pr4vLwsAgH7N4u3tJvsAM5BvBUnSvQW7Aj0CLtPmnORAj4CvIWvXikCPgK+h\nMHldoEfwm6tmmwAAAFx9iAEAAAxHDAAAYDhiAAAAwxEDAAAYjhgAAMBwxAAAAIYjBgAAMBwxAACA\n4YgBAAAMRwwAAGA4YgAAAMMRAwAAGI4YAADAcMQAAACGIwYAADAcMQAAgOGIAQAADEcMAABgOGIA\nAADDEQMAABiOGAAAwHDEAAAAhiMGAAAwHDEAAIDhiAEAAAxHDAAAYDhiAAAAwxEDAAAYjhgAAMBw\nxAAAAIYjBgAAMBwxAACA4YgBAAAMRwwAAGA4YgAAAMMRAwAAGI4YAADAcMQAAACGIwYAADAcMQAA\ngOGIAQAADEcMAABgOGIAAADDEQMAABiOGAAAwHDEAAAAhgsO9ABX0hNPPKH6+npZLBbl5uYqJiYm\n0CMBAHDVGzAx8Mc//lF/+ctfVFJSosOHDys3N1clJSWBHgsAgKvegNkmqKmp0cyZMyVJI0eO1Gef\nfaaTJ08GeCoAAK5+AyYG3G63IiIifF87HA65XK4ATgQAQP8wYLYJvsrr9fb4eFSUvY8mCYzfr/9+\noEcAjPTqPb8I9AjAJRswdwacTqfcbrfv6+bmZkVFRQVwIgAA+ocBEwMJCQmqrKyUJDU0NMjpdGrw\n4MEBngoAgKvfgNkmiIuL0/jx47Vw4UJZLBatWrUq0CMBANAvWLwX21wHAAAD2oDZJgAAAJeHGAAA\nwHDEAAAAhhswLyDEwNbe3u4vDkR7AAAEDklEQVR762hUVJTCwsICPBFgrra2NoWHhwd6DFxBvIAQ\nV7V9+/ZpzZo1amtrU0REhLxer5qbmzV06FDl5eVpzJgxgR4RMM7ixYv1q1/9KtBj4ArizgCuak88\n8YTWrFmjkSNHdltvaGjQo48+ql//+tcBmgwY2Hr62WpqaurDSdAXiAFc1bxe7zkhIEnjx4/X2bNn\nAzARYIYtW7Zo6tSpcjqd5zx25syZAEwEfyIGcFWLjY3V0qVLNXPmTDkcDklf/FKqyspK3XrrrQGe\nDhi4CgsL9fjjj2vlypWy2WzdHqutrQ3QVPAXXjOAq95//dd/qaamxvcCQqfTqYSEBN10000BngwY\n2Do6OvSNb3xDVmv3N541NDRo/PjxAZoK/kAMAABgOD5nAAAAwxEDAAAYjhgA0Gv/9E//pJ07d3Zb\nO336tG655RZ98skn533OokWLVF1d3RfjAbhMxACAXktNTdXrr7/ebe3tt99WbGys/v7v/z5AUwH4\nuogBAL02e/Zs7d27Vy0tLb61119/XampqXr77bd1zz33aNGiRUpPT9fRo0e7Pbe2tlZpaWm+r3Ny\ncvTaa69JksrLy5Wenq60tDRlZWWppaVFZ86cUU5Oju655x4tXLhQq1ev7puLBAxEDADotUGDBun2\n22/Xm2++KUlqbm7WgQMHlJycrLa2Nj399NPaunWrpk+f3utPh/zkk0/0wgsvaMuWLdq+fbtuvfVW\nbdy4UQcPHlR9fb1KSkpUXFyscePG6cSJE/68PMBYfOgQgEuSmpqq1atXKyMjQ2VlZZo3b55sNpu+\n9a1v6eGHH5bX65XL5er150B88MEHcrlcyszMlCR1dnbqmmuu0ciRIxUREaF//dd/VVJSkubMmSO7\n3e7PSwOMRQwAuCQxMTHq7OzU4cOH9bvf/U4/+9nP5PF4tHz5cv32t7/V9ddfr23btmn//v3dnmex\nWLp97fF4JEk2m00xMTHauHHjOd/rlVdeUUNDg6qqqpSamqrt27ef9+NxAXw9bBMAuGR33323nn/+\neQ0aNEijR49We3u7rFarhg8frs8//1zvvPOOOjs7uz1n8ODBampqktfrVUdHh+rr6yVJEyZM0Icf\nfiiXyyVJeuutt7Rz507t27dPv/3tbzV+/HgtW7ZM48eP1//93//19aUCRuDOAIBL9r3vfU9PPfWU\n8vLyJElDhgzRvHnzlJqaqmHDhikzM1MrVqzQW2+95XvO2LFjNWbMGN1555267rrrfNsIQ4cO1SOP\nPKIlS5Zo0KBBCg0N1ZNPPqmQkBAVFhaqpKRENptN1113neLi4gJyvcBAx8cRAwBgOLYJAAAwHDEA\nAIDhiAEAAAxHDAAAYDhiAAAAwxEDAAAYjhgAAMBwxAAAAIb7f6VN0ynBHtf/AAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 576x396 with 1 Axes>"
]
},
"metadata": {
"tags": []
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "O7zpuLthujTw"
},
"source": [
"## Using Universal Sentence Encoder - For creating embedding of text at sentence level"
]
},
{
"cell_type": "code",
"metadata": {
"id": "eDvXeP3bAi09",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
},
"outputId": "a8240672-0242-4d95-8e30-29f8752277e9"
},
"source": [
"import tensorflow as tf\n",
"import tensorflow_hub as hub\n",
"# enabling the pretrained model for trainig our custom model using tensorflow hub\n",
"module_url = \"https://tfhub.dev/google/universal-sentence-encoder-large/3\"\n",
"embed = hub.Module(module_url)\n",
"\n",
"# creating a method for embedding and will using method for every input layer \n",
"def UniversalEmbedding(x):\n",
" return embed(tf.squeeze(tf.cast(x, tf.string)), signature=\"default\", as_dict=True)[\"default\"]\n",
" \n"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"INFO:tensorflow:Using /tmp/tfhub_modules to cache modules.\n",
"INFO:tensorflow:Downloading TF-Hub Module 'https://tfhub.dev/google/universal-sentence-encoder-large/3'.\n",
"INFO:tensorflow:Downloaded https://tfhub.dev/google/universal-sentence-encoder-large/3, Total size: 810.60MB\n",
"INFO:tensorflow:Downloaded TF-Hub Module 'https://tfhub.dev/google/universal-sentence-encoder-large/3'.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "9ywhyFN5GgGa"
},
"source": [
"DROPOUT = 0.1"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "WbXVIhUHuuXR"
},
"source": [
"### Building the neural network"
]
},
{
"cell_type": "code",
"metadata": {
"id": "shVOz9OrEiXg",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 799
},
"outputId": "7bf83bbd-a395-4ea0-c363-ba51a906d30f"
},
"source": [
"# Taking the question1 as input and ceating a embedding for each question before feed it to neural network\n",
"q1 = layers.Input(shape=(1,), dtype=tf.string)\n",
"embedding_q1 = layers.Lambda(UniversalEmbedding, output_shape=(512,))(q1)\n",
"# Taking the question2 and doing the same thing mentioned above, using the lambda function\n",
"q2 = layers.Input(shape=(1,), dtype=tf.string)\n",
"embedding_q2 = layers.Lambda(UniversalEmbedding, output_shape=(512,))(q2)\n",
"\n",
"# Concatenating the both input layer\n",
"merged = layers.concatenate([embedding_q1, embedding_q2])\n",
"merged = layers.Dense(200, activation='relu')(merged)\n",
"merged = layers.Dropout(DROPOUT)(merged)\n",
"\n",
"# Normalizing the input layer,applying dense and dropout layer for fully connected model and to avoid overfitting \n",
"merged = layers.BatchNormalization()(merged)\n",
"merged = layers.Dense(200, activation='relu')(merged)\n",
"merged = layers.Dropout(DROPOUT)(merged)\n",
"\n",
"merged = layers.BatchNormalization()(merged)\n",
"merged = layers.Dense(200, activation='relu')(merged)\n",
"merged = layers.Dropout(DROPOUT)(merged)\n",
"\n",
"merged = layers.BatchNormalization()(merged)\n",
"merged = layers.Dense(200, activation='relu')(merged)\n",
"merged = layers.Dropout(DROPOUT)(merged)\n",
"\n",
"# Using the Sigmoid as the activation function and binary crossentropy for binary classifcation as 0 or 1\n",
"merged = layers.BatchNormalization()(merged)\n",
"pred = layers.Dense(2, activation='sigmoid')(merged)\n",
"model = Model(inputs=[q1,q2], outputs=pred)\n",
"model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
"model.summary()"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n",
"INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n",
"__________________________________________________________________________________________________\n",
"Layer (type) Output Shape Param # Connected to \n",
"==================================================================================================\n",
"input_1 (InputLayer) (None, 1) 0 \n",
"__________________________________________________________________________________________________\n",
"input_2 (InputLayer) (None, 1) 0 \n",
"__________________________________________________________________________________________________\n",
"lambda_1 (Lambda) (None, 512) 0 input_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"lambda_2 (Lambda) (None, 512) 0 input_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"concatenate_1 (Concatenate) (None, 1024) 0 lambda_1[0][0] \n",
" lambda_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"dense_1 (Dense) (None, 200) 205000 concatenate_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"dropout_1 (Dropout) (None, 200) 0 dense_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"batch_normalization_1 (BatchNor (None, 200) 800 dropout_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"dense_2 (Dense) (None, 200) 40200 batch_normalization_1[0][0] \n",
"__________________________________________________________________________________________________\n",
"dropout_2 (Dropout) (None, 200) 0 dense_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"batch_normalization_2 (BatchNor (None, 200) 800 dropout_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"dense_3 (Dense) (None, 200) 40200 batch_normalization_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"dropout_3 (Dropout) (None, 200) 0 dense_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"batch_normalization_3 (BatchNor (None, 200) 800 dropout_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"dense_4 (Dense) (None, 200) 40200 batch_normalization_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"dropout_4 (Dropout) (None, 200) 0 dense_4[0][0] \n",
"__________________________________________________________________________________________________\n",
"batch_normalization_4 (BatchNor (None, 200) 800 dropout_4[0][0] \n",
"__________________________________________________________________________________________________\n",
"dense_5 (Dense) (None, 2) 402 batch_normalization_4[0][0] \n",
"==================================================================================================\n",
"Total params: 329,202\n",
"Trainable params: 327,602\n",
"Non-trainable params: 1,600\n",
"__________________________________________________________________________________________________\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "in7LCAt5pX_Z"
},
"source": [
"# Pushing all the strings to a list and converting to ndarray\n",
"q1= df.question1.tolist()\n",
"q2= df.question2.tolist()\n",
"labels= df.is_duplicate.tolist()"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "1zvie2yku3pD"
},
"source": [
"### Preparing the train and test data using Sklearn library"
]
},
{
"cell_type": "code",
"metadata": {
"id": "R85vpRPKByAC"
},
"source": [
"from sklearn.model_selection import train_test_split\n",
"X1 = df['question1']\n",
"X2 = df['question2']\n",
"y = df['is_duplicate']\n",
"# Using the sklearn to split data in question1 and question2 train and test in the ration 80-20 %\n",
"X1_train, X1_test,X2_train, X2_test, y_train, y_test = train_test_split(X1, X2, y, test_size=0.2, random_state=42)\n",
"\n",
"train_q1 = X1_train.tolist()\n",
"train_q1 = np.array(train_q1, dtype=object)[:, np.newaxis]\n",
"train_q2 = X2_train.tolist()\n",
"train_q2 = np.array(train_q2, dtype=object)[:, np.newaxis]\n",
"\n",
"train_labels = np.asarray(pd.get_dummies(y_train), dtype = np.int8)\n",
"\n",
"test_q1 = X1_test.tolist()\n",
"test_q1 = np.array(test_q1, dtype=object)[:, np.newaxis]\n",
"test_q2 = X2_test.tolist()\n",
"test_q2 = np.array(test_q2, dtype=object)[:, np.newaxis]\n",
"\n",
"test_labels = np.asarray(pd.get_dummies(y_test), dtype = np.int8)\n",
"\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "tsqXBt_KvEgH"
},
"source": [
"### Training the model "
]
},
{
"cell_type": "code",
"metadata": {
"id": "SO2GRdB6CVDf"
},
"source": [
"from keras.callbacks import ModelCheckpoint\n",
"\n",
"# Creating the tensorflow session to train the model and save checkpoint after every epoch.\n",
"with tf.Session() as session:\n",
" K.set_session(session)\n",
" session.run(tf.global_variables_initializer())\n",
" session.run(tf.tables_initializer())\n",
" \n",
" filepath=\"drive/My Drive/Colab Notebooks/Northout/model-{epoch:02d}-{val_acc:.2f}.hdf5\"\n",
" checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=True, mode='auto', period=1)\n",
" callbacks_list = [checkpoint]\n",
"\n",
" history = model.fit([train_q1, train_q2], \n",
" train_labels,\n",
" validation_data=([test_q1, test_q2], test_labels),\n",
" epochs=10,\n",
" batch_size=512, callbacks=callbacks_list)\n",
"# model.save('drive/My Drive/Colab Notebooks/Northout/final_model.h5')"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "5duOBYUdvQVh"
},
"source": [
"### Predicting the model"
]
},
{
"cell_type": "code",
"metadata": {
"id": "bWp3WNTTIH8q",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "afae2bf4-8001-4c15-dfee-36b7c89054b5"
},
"source": [
"import numpy as np\n",
"import tensorflow as tf\n",
"from keras import backend as K\n",
"q1 = input(\"Type Question 1 here -->\")\n",
"q2 = input(\"Type Question 2 here -->\") \n",
"q1 = np.array([[q1],[q1]])\n",
"q2 = np.array([[q2],[q2]])\n",
"\n",
"\n",
"# Using the same tensorflow session for embedding the test string\n",
"with tf.Session() as session:\n",
" K.set_session(session)\n",
" session.run(tf.global_variables_initializer())\n",
" session.run(tf.tables_initializer())\n",
" # Loading the save weights\n",
" model.load_weights('drive/My Drive/Colab Notebooks/Northout/model-04-0.84.hdf5') \n",
" # Predicting the similarity between the two input questions \n",
" predicts = model.predict([q1, q2], verbose=0)\n",
" predict_logits = predicts.argmax(axis=1)\n",
" print(\"----FINAL RESULT----\")\n",
" if(predict_logits[0] == 1):\n",
" print(\"****Questions are Similar****\")\n",
" else:\n",
" print(\"****Questions are not Similar****\")\n",
" "
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"Type Question 1 here -->How old are you?\n",
"Type Question 2 here -->What is your age\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"Exception ignored in: <bound method BaseSession._Callable.__del__ of <tensorflow.python.client.session.BaseSession._Callable object at 0x7f2b6210ec50>>\n",
"Traceback (most recent call last):\n",
" File \"/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py\", line 1455, in __del__\n",
" self._session._session, self._handle, status)\n",
" File \"/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/errors_impl.py\", line 528, in __exit__\n",
" c_api.TF_GetCode(self.status.status))\n",
"tensorflow.python.framework.errors_impl.CancelledError: Session has been closed.\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"----FINAL RESULT----\n",
"****Questions are Similar****\n"
],
"name": "stdout"
}
]
}
]
}
@Lord-V15
Copy link
Author

Changed setting to public gist.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment