Skip to content

Instantly share code, notes, and snippets.

@egenc
Last active February 9, 2023 15:41
Show Gist options
  • Save egenc/beb43c993ca3b53e23e6c27a2a22dfec to your computer and use it in GitHub Desktop.
Save egenc/beb43c993ca3b53e23e6c27a2a22dfec to your computer and use it in GitHub Desktop.
inscribe_task.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/egenc/beb43c993ca3b53e23e6c27a2a22dfec/inscribe_task.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "SMKHXEGBHEfw"
},
"source": [
"# 1. Read Data and Assign Labels"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "iWC1kLOCHyXZ"
},
"outputs": [],
"source": [
"INPUT_1 = \"/content/drive/MyDrive/dataset/companies.txt\"\n",
"INPUT_2 = \"/content/drive/MyDrive/dataset/individuals.txt\"\n",
"\n",
"with open(INPUT_1, \"r\") as in_1:\n",
" data1 = in_1.read().split(\"\\n\")\n",
"\n",
"with open(INPUT_2, \"r\") as in_2:\n",
" data2 = in_2.read().split(\"\\n\")\n",
"\n",
"d1 = {'input_text': data1, 'label': [0 for ele in data1]}\n",
"d2 = {'input_text': data2, 'label': [1 for ele in data2]}"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_ou6DLGmHbmg"
},
"source": [
"Checking Companies with label 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 143
},
"id": "fbIi78-XLPQ5",
"outputId": "38656dde-2c74-4035-c79d-9bf73a2154bb"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" input_text label\n",
"0 Albuquerque Dsm Club 0\n",
"1 Teriyaki Hawaii 0\n",
"2 Allure Electrolysis Waxing 0"
],
"text/html": [
"\n",
" <div id=\"df-d97d909d-c00c-4394-8987-a3189ae15e9e\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>input_text</th>\n",
" <th>label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Albuquerque Dsm Club</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Teriyaki Hawaii</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Allure Electrolysis Waxing</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d97d909d-c00c-4394-8987-a3189ae15e9e')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-d97d909d-c00c-4394-8987-a3189ae15e9e button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-d97d909d-c00c-4394-8987-a3189ae15e9e');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 2
}
],
"source": [
"import pandas as pd\n",
"\n",
"df1 = pd.DataFrame.from_dict(d1)\n",
"df1.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nW0sWMXIKH4x"
},
"source": [
"## 1.1 Data Cleaning"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_KdFFXsqHrlp"
},
"source": [
"Dropping duplicates"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "o2n0IfyPOq2h",
"outputId": "9c6d140e-c15a-438a-a705-d4bd15844769"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"161056\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"161056"
]
},
"metadata": {},
"execution_count": 3
}
],
"source": [
"print(len(df1))\n",
"df1 = df1.drop_duplicates()\n",
"len(df1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-4gxa3CZHuYB"
},
"source": [
"Checking Individuals with label 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 143
},
"id": "jFUFMFDfFQVh",
"outputId": "3076c61f-a0c2-4221-e85a-1abd2b03421f"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" input_text label\n",
"0 David Kagan 1\n",
"1 Angel Donchev 1\n",
"2 Erika Parisi 1"
],
"text/html": [
"\n",
" <div id=\"df-badffb3b-39b5-4ecd-9b14-d5cbae3e4975\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>input_text</th>\n",
" <th>label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>David Kagan</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Angel Donchev</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Erika Parisi</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-badffb3b-39b5-4ecd-9b14-d5cbae3e4975')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-badffb3b-39b5-4ecd-9b14-d5cbae3e4975 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-badffb3b-39b5-4ecd-9b14-d5cbae3e4975');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 4
}
],
"source": [
"df2 = pd.DataFrame.from_dict(d2)\n",
"df2.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "10SCFgTNH1tM"
},
"source": [
"Dropping duplicates"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "sN8gR-ltPy9U",
"outputId": "2ab2f3b0-2cce-4678-faca-450b3c99b3c0"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"94247\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"90883"
]
},
"metadata": {},
"execution_count": 5
}
],
"source": [
"print(len(df2))\n",
"df2 = df2.drop_duplicates()\n",
"len(df2)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "hF27daSpH6HN"
},
"source": [
"Concatenating two dataframes and dropping duplicates"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "FKqknPOZQAJU",
"outputId": "f6e038ba-1d1f-4ddd-da9e-7f298ef8f921"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"251939\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"251665"
]
},
"metadata": {},
"execution_count": 6
}
],
"source": [
"df = pd.concat([df1,df2])\n",
"\n",
"print(len(df))\n",
"df = df.drop_duplicates(subset=\"input_text\",\n",
" keep=False)\n",
"len(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ZgZmHTqTQUp8",
"outputId": "ad0e6445-4c99-49d3-adc9-e3da23b713b9"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" input_text label\n",
"0 Albuquerque Dsm Club 0\n",
"1 Teriyaki Hawaii 0\n",
"2 Allure Electrolysis Waxing 0\n",
"---------------\n",
" input_text label\n",
"94243 Mark Dudas 1\n",
"94244 Parag Dixit 1\n",
"94245 Andrew Battista 1\n"
]
}
],
"source": [
"print(df.head(3))\n",
"print(\"-\"*15)\n",
"print(df.tail(3))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "R2E8KqthSShp"
},
"source": [
"Checking NaN values in the DataFrame"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "O-oqh0j8QqLz",
"outputId": "e5b08cc4-672e-4180-f951-742d6ae6cfaa"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0"
]
},
"metadata": {},
"execution_count": 8
}
],
"source": [
"df.isna().sum().sum()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "jE-N31IGTRAy"
},
"source": [
"Checking Empty Strings - These might create noises since they are None and may lead to performance decrease\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 49
},
"id": "hnveH8N3RXou",
"outputId": "64f1b44a-c1c0-48bd-95d6-37ae871a74cd"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Empty DataFrame\n",
"Columns: [input_text, label, string_content]\n",
"Index: []"
],
"text/html": [
"\n",
" <div id=\"df-ea237b19-6c0b-4e48-97d0-0d757af7eaf4\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>input_text</th>\n",
" <th>label</th>\n",
" <th>string_content</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ea237b19-6c0b-4e48-97d0-0d757af7eaf4')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-ea237b19-6c0b-4e48-97d0-0d757af7eaf4 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-ea237b19-6c0b-4e48-97d0-0d757af7eaf4');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 9
}
],
"source": [
"def check_empty_strings(s) -> bool:\n",
" return len(s.replace(\" \", \"\")) > 0\n",
"\n",
"df[\"string_content\"] = df.apply(lambda row: check_empty_strings(row[\"input_text\"]), axis=1)\n",
"df[df.string_content == False]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"id": "4_CGSvD9TC95",
"outputId": "97348251-4438-459c-d388-daf0154c9324"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" input_text label string_content\n",
"0 Albuquerque Dsm Club 0 True\n",
"1 Teriyaki Hawaii 0 True\n",
"2 Allure Electrolysis Waxing 0 True\n",
"3 Los Primos Auto Sales 0 True\n",
"4 John Wesselius 0 True\n",
"... ... ... ...\n",
"94229 Paul Coulombe 1 True\n",
"94230 Vinod Chavan 1 True\n",
"94231 Nishank Chandawala 1 True\n",
"94232 Zachary Butler 1 True\n",
"94233 Dale Briggs 1 True\n",
"\n",
"[251655 rows x 3 columns]"
],
"text/html": [
"\n",
" <div id=\"df-8ca21b9a-e322-40ad-a90e-1cea1e6a7413\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>input_text</th>\n",
" <th>label</th>\n",
" <th>string_content</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Albuquerque Dsm Club</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Teriyaki Hawaii</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Allure Electrolysis Waxing</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Los Primos Auto Sales</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>John Wesselius</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94229</th>\n",
" <td>Paul Coulombe</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94230</th>\n",
" <td>Vinod Chavan</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94231</th>\n",
" <td>Nishank Chandawala</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94232</th>\n",
" <td>Zachary Butler</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94233</th>\n",
" <td>Dale Briggs</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>251655 rows × 3 columns</p>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-8ca21b9a-e322-40ad-a90e-1cea1e6a7413')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-8ca21b9a-e322-40ad-a90e-1cea1e6a7413 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-8ca21b9a-e322-40ad-a90e-1cea1e6a7413');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 10
}
],
"source": [
"df = df.drop(df[df.string_content == False].index)\n",
"\n",
"df.head(-10)"
]
},
{
"cell_type": "markdown",
"source": [
"Max length of a string in training data is 32. Therefore, there is no need to limit that."
],
"metadata": {
"id": "c4HbADeewucY"
}
},
{
"cell_type": "code",
"source": [
"int(df[\"input_text\"].str.len().max())"
],
"metadata": {
"id": "bx_ZGU9Mwdmx",
"outputId": "bfa77b83-cbcf-415b-8fe2-f9dbcb598517",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"32"
]
},
"metadata": {},
"execution_count": 68
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "hOppDSBLIhnu"
},
"source": [
"Removing punctuations from input texts"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"id": "Yi5iVFO3NBsF",
"outputId": "334475c1-67c4-4580-80a8-29af1158c671"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" input_text label\n",
"0 albuquerque dsm club 0\n",
"1 teriyaki hawaii 0\n",
"2 allure electrolysis waxing 0\n",
"3 los primos auto sales 0\n",
"4 john wesselius 0\n",
"... ... ...\n",
"94229 paul coulombe 1\n",
"94230 vinod chavan 1\n",
"94231 nishank chandawala 1\n",
"94232 zachary butler 1\n",
"94233 dale briggs 1\n",
"\n",
"[251655 rows x 2 columns]"
],
"text/html": [
"\n",
" <div id=\"df-1ab2fc51-b752-4874-b146-0a4819f31068\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>input_text</th>\n",
" <th>label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>albuquerque dsm club</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>teriyaki hawaii</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>allure electrolysis waxing</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>los primos auto sales</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>john wesselius</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94229</th>\n",
" <td>paul coulombe</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94230</th>\n",
" <td>vinod chavan</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94231</th>\n",
" <td>nishank chandawala</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94232</th>\n",
" <td>zachary butler</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94233</th>\n",
" <td>dale briggs</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>251655 rows × 2 columns</p>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-1ab2fc51-b752-4874-b146-0a4819f31068')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-1ab2fc51-b752-4874-b146-0a4819f31068 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-1ab2fc51-b752-4874-b146-0a4819f31068');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 11
}
],
"source": [
"df[\"input_text\"] = df['input_text'].str.lower().replace('[^\\w\\s]','')\n",
"df = df.drop(columns=[\"string_content\"])\n",
"\n",
"df.head(-10)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "TqieoeiMKQGi"
},
"source": [
"# 2. Modelling"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KQjtZ36YIl0m"
},
"source": [
"We are going to need numerical values instead of texts. Thus, assigning numerical values using **CountVectorizer** . More info: https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "D537uMC-OeTm"
},
"outputs": [],
"source": [
"from sklearn.feature_extraction.text import CountVectorizer\n",
"\n",
"vectorizer = CountVectorizer()\n",
"X = vectorizer.fit_transform(df[\"input_text\"])\n",
"y = df[\"label\"]"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Eu7q-BySI2r5"
},
"source": [
"Splitting train and test data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "qnrhS68QTNAD"
},
"outputs": [],
"source": [
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=48)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "hwHskT6iKTL1"
},
"source": [
"## 2.1.1 LogisticRegression\n",
"\n",
"For binary classification (such as this task), starting with logistic regression is a good choice since it might give us a baseline. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pCtUc9jVPN1d",
"outputId": "6eff41c0-e677-4756-fba4-58fcd8b23891"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.8/dist-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
" n_iter_i = _check_optimize_result(\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[27698, 4489],\n",
" [ 2085, 16061]])"
]
},
"metadata": {},
"execution_count": 14
}
],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn import metrics\n",
"\n",
"logreg = LogisticRegression(random_state=16)\n",
"\n",
"logreg.fit(X_train, y_train)\n",
"\n",
"y_pred = logreg.predict(X_test)\n",
"\n",
"cnf_matrix = metrics.confusion_matrix(y_test, y_pred)\n",
"cnf_matrix"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "BDxTHgpEKjZp"
},
"source": [
"## 2.1.2 Visualizing the results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 343
},
"id": "mWkoY-M-QPd6",
"outputId": "14b15f0f-012b-449a-a85c-00c8c612b0d0"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Text(0.5, 257.44, 'Predicted label')"
]
},
"metadata": {},
"execution_count": 15
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAa4AAAE0CAYAAAB0CNe/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deZxWZf3/8dd7BlAUZBMQWVIT90xLcSszNUJNccv1q6Qm5RK5lFsL7mXfXMs0VEq/5kLmQkoikqbmEoQLApYoLiCbDDvINp/fH+cM3vKbGeYeZjvnfj99nMfc93W26yDO2+s617mOIgIzM7OsKGvuCpiZmRXDwWVmZpni4DIzs0xxcJmZWaY4uMzMLFMcXGZmlikOLmvRJLWV9FdJCyX9eQOOc7Kkpxqybs1F0lcl/ae562HWXOTnuKwhSDoJuADYAVgMvAZcExEvbOBxTwF+AOwbEas3uKItnKQA+kbE1Oaui1lL5RaXbTBJFwA3AdcC3YE+wO+AgQ1w+M8B/y2F0KoLSa2auw5mzc3BZRtEUgfgSuCciHg4IpZGxKqI+GtE/DjdZiNJN0n6KF1ukrRRuu4ASdMlXShpjqSZkk5L110B/Bw4XtISSWdIulzSvQXn30pSVP1Cl/QdSe9KWixpmqSTC8pfKNhvX0nj0i7IcZL2LVj3rKSrJP0zPc5Tkjav4fqr6n9RQf2PlHSopP9KqpB0WcH2/SS9JGlBuu1vJbVJ1z2XbvZ6er3HFxz/YkmzgD9UlaX7fD49x5fS71tKmivpgA36F2vWgjm4bEPtA2wMPFLLNj8B9gZ2A74I9AN+WrB+C6AD0BM4A7hVUqeIGErSinswItpFxF21VUTSpsAtwCER0R7Yl6TLct3tOgNPpNt2AW4AnpDUpWCzk4DTgG5AG+BHtZx6C5I/g54kQXsH8D/Al4GvAj+TtHW67RrgfGBzkj+7g4CzASJi/3SbL6bX+2DB8TuTtD4HF544It4BLgbulbQJ8Afg7oh4tpb6mmWag8s2VBfg4/V05Z0MXBkRcyJiLnAFcErB+lXp+lURMQpYAmxfz/pUArtIahsRMyNiUjXbHAa8HRH/FxGrI+J+4C3g8IJt/hAR/42I5cAIktCtySqS+3mrgAdIQunmiFicnn8ySWATEf+OiJfT874H/B74Wh2uaWhErEjr8xkRcQcwFXgF6EHyPwpmueXgsg01D9h8PfdetgTeL/j+flq29hjrBN8yoF2xFYmIpcDxwPeBmZKekLRDHepTVaeeBd9nFVGfeRGxJv1cFSyzC9Yvr9pf0naSHpc0S9IikhZltd2QBeZGxCfr2eYOYBfgNxGxYj3bmmWag8s21EvACuDIWrb5iKSbq0qftKw+lgKbFHzfonBlRIyOiG+QtDzeIvmFvr76VNVpRj3rVIzbSOrVNyI2Ay4DtJ59ah36K6kdyeCYu4DL065Qs9xycNkGiYiFJPd1bk0HJWwiqbWkQyT9Kt3sfuCnkrqmgxx+Dtxb0zHX4zVgf0l90oEhl1atkNRd0sD0XtcKki7HymqOMQrYTtJJklpJOh7YCXi8nnUqRntgEbAkbQ2etc762cA2RR7zZmB8RHyX5N7d7RtcS7MWzMFlGywirid5huunwFzgQ+Bc4NF0k6uB8cAbwERgQlpWn3ONAR5Mj/VvPhs2ZWk9PgIqSO4drRsMRMQ84FvAhSRdnRcB34qIj+tTpyL9iGTgx2KS1uCD66y/HLg7HXV43PoOJmkgMIBPr/MC4EtVoynN8sgPIJuZWaa4xWVmZpni4DIzs0xxcJmZWaY4uMzMLFMcXGZmlikOLms2ktZIek3Sm5L+nM61V99j/VHSsennOyXtVMu2BxROqlvEOd6rbrLdmsrX2WZJkee6XFJt8yOalSwHlzWn5RGxW0TsAqwkmapprfq+wiMivhsRk2vZ5ACSCXjNLIMcXNZSPA9sm7aGnpc0EpgsqVzS/6avHnlD0vcAlPitpP9IeppkFnfSdc9K2iP9PEDSBEmvSxoraSuSgDw/be19NZ3R4y/pOcZJ2i/dt0v6SpNJku5k/VMzIelRSf9O9xm8zrob0/KxkrqmZZ+X9GS6z/M1zK1oZgX8UjprdmnL6hDgybToS8AuETEt/eW/MCL2VPIOr39KegrYnWQG+Z1IXl45GRi+znG7ksxOsX96rM4RUSHpdmBJRPw63e4+4MaIeEFSH2A0sCMwFHghIq6UdBjJK1fW5/T0HG2BcZL+ks7UsSnJtEznS/p5euxzgWHA9yPibUl7kbyA88B6/DGalQwHlzWntpKq3pf1PMkksfsC/4qIaWl5f2DXqvtXJO/t6gvsD9yfzsr+kaS/V3P8vYHnqo4VERU11ONgYCdpbYNqs3Ti2v2Bo9N9n5A0vw7XNETSUenn3mld55HMmVg1vdO9wMPpOfYF/lxw7o3qcA6zkubgsua0PCI+856r9Bf40sIi4AcRMXqd7Q5twHqUAXuv++qQgjCpEyVvHT4Y2Ccilkl6luQFk9WJ9LwL1v0zMLPa+R6XtXSjgbMktYa177PaFHgOOD69B9YD+Ho1+75MMpP81um+Va/7WEwyS3uVp4AfVH2RVBUkz5FMiIukQ4BO66lrB2B+Glo7kLT4qpQBVa3Gk0i6IBcB0yR9Oz2HJH1xPecwK3kOLmvp7iS5fzVB0pskbwxuBTwCvJ2uu4fkvWCfkb5teTBJt9zrfNpV91fgqKrBGcAQYI908MdkPh3deAVJ8E0i6TL8YD11fRJoJWkK8EuS4KyyFOiXXsOBwJVp+cnAGWn9JgED6/BnYlbSPDu8mZlliltcZmaWKQ4uMzPLlBY7qrBtnxPdh2lNavY7dXlMy6xhbdb64OKGr65Hsb87l39wf4Oevym4xWVmZpnSYltcZmZWPCn/7REHl5lZjqgEOtIcXGZmOeIWl5mZZYqDy8zMMqXYOTazyMFlZpYrbnGZmVmGuKvQzMwyxcFlZmaZ4uHwZmaWKW5xmZlZpji4zMwsUxxcZmaWKcLPcZmZWYa4xWVmZplSVpb/X+v5v0Izs5LiFpeZmWWIuwrNzCxTHFxmZpYpnjnDzMwyxS0uMzPLlFJ4H1f+o9nMrIRIZUUt6z+eekt6RtJkSZMk/TAtv1zSDEmvpcuhBftcKmmqpP9I+mZB+YC0bKqkSwrKt5b0Slr+oKQ2tdXJwWVmliOirKilDlYDF0bETsDewDmSdkrX3RgRu6XLKIB03QnAzsAA4HeSyiWVA7cChwA7AScWHOe69FjbAvOBM2qrkIPLzCxHGrrFFREzI2JC+nkxMAXoWcsuA4EHImJFREwDpgL90mVqRLwbESuBB4CBSvo2DwQeSve/Gziytjo5uMzMcqShg+uzx9ZWwO7AK2nRuZLekDRcUqe0rCfwYcFu09Oymsq7AAsiYvU65TVycJmZ5UixXYWSBksaX7AMrva4UjvgL8B5EbEIuA34PLAbMBO4vqmu0aMKzczypMhWVEQMA4bVekipNUlo/SkiHk73m12w/g7g8fTrDKB3we690jJqKJ8HdJTUKm11FW5fLbe4zMxypBFGFQq4C5gSETcUlPco2Owo4M3080jgBEkbSdoa6Av8CxgH9E1HELYhGcAxMiICeAY4Nt1/EPBYbXVyi8vMLEca4Tmu/YBTgImSXkvLLiMZFbgbEMB7wPcAImKSpBHAZJIRiedExJq0bucCo4FyYHhETEqPdzHwgKSrgVdJgrJGDi4zsxxp6CmfIuIFqPbtlKNq2eca4JpqykdVt19EvEsy6rBOHFxmZjniKZ/MzCxbSmDKJweXmVme5L/B5eAyM8sVt7jMzCxTHFxmZpYp7io0M7MsCbe4zMwsU/KfWw4uM7NcKct/cjm4zMzyxF2FZmaWKfnPLQeXmVmuuKvQzMwyxV2FZmaWKfnPLQeXmVmuuKvQzMwyJf+55eAyM8sTz5xhZmbZ4q5CMzPLlPznloPLzCxX3FVoZmaZ4q5CMzPLlPznloPLzCxXyvL/JkkHl5lZnuQ/txxcZma54sEZZmaWKfnPLQeXmVmehEcVWnPp1aMzd954Nt26diACht83lluHP8n/3TqEvtv0AKDjZpuyYNFS9j7kUgB22aEPv/3FGbRvvwmVlZV85fCfsmLFKo49fG8uOvcoysvL+NvYCfz0F/cD0HvLLtxxw1l02GxTysvL+Nkv72f0M6812zVby7NmTSWnHn8d3bp15MbfnbW2/NfXjmDkIy/x3LgbAZg1s4LLL7uHxYuXU7mmknPPH8h+++/CqlWrufaK+5ky6QPKJC685Fi+3G+75rqc0uCuQmsuq9dUcsnV9/Lam+/RbtONefGJaxn7/EROOeeWtdv88qf/w8LFywAoLy9j+M3ncMZ5tzJxygd07tiOVatW07ljO6697GT2PewyPq5YzB03nMUB++3Ms/+cxMVDjuIvj7/MHfc+zQ59e/LoHy9mh/2GNNclWwv0wL3PsPU2W7B0ySdryya/+T6LFi37zHZ3/f5JDv7mlzj2hP15952ZnHfW7xj51C488tA/k+M88hMq5i3mh2fdyt0PXERZCYx8azb5z61SGH+STbPmLOC1N98DYMnST3hr6gy23KLzZ7Y55lt7M+KxFwE4eP9deXPKB0yc8gEAFQuWUFkZbN2nG1Pfm8XHFYsB+PsLEznykL0AiAg2a98WgA7tN2Hm7PlNcWmWEbNnzeeF595k4DH7ri1bs6aSW65/hCEXHvWZbSVYujQJtyWLl7N51w4ATHtnFnumLazOXdrTrn1bpkz6oImuoESVqbglgxqtxSVpB2Ag0DMtmgGMjIgpjXXOvOrTa3N223krxr06dW3Zfv12YPbHC3nnvVkA9N2mB0Ew8v8uYfPOm/HQX1/ihtv/yjvvz2a7bXrQp9fmzJhZwRH996B1m+Rf+zU3/oW/3nspZ33nm2yyyUYcdtK1zXJ91jLdcN1DDLngKJYt/bS1NeK+f7D/13ddG0xVBp99GOcO/i0j7vsHy5ev4NY7kpZ73+178tyzE+l/6B7MnjWftyZ/yOxZ89n5C1s15aWUlhLoKmyUFpeki4EHSBqt/0oXAfdLuqSW/QZLGi9p/OolU2varKRsuslG3P/78/nxFfeweMnyteXHDdyXP6etLYBW5WXsu8f2nDbkVg465nKO+OYeHLDfzixYuJQhPxnOvbf+kLEPDeX96R9TuaYyOcYR+3Lvn59j273O5ahBv+Kum85GJfCX3tbv+Wcn0qlze3bcuc/asrlzFjD2qQkcd9LX/r/tR48az7cG7sUTY6/hpt+dzdBL76ayspIjjtqHbt07curx13HDdQ+x625bu5uwsanIJYMaq8V1BrBzRKwqLJR0AzAJ+GV1O0XEMGAYQNs+J0Yj1S0zWrUq5/7fn8+Dj/yTx54ct7a8vLyMgQP6sd9hl60tmzGzghf+9Rbz5iddgk8+8xq777I1z/5zEqOensCopycAcPpJB7KmMgmuQSd8nYGn/AKAVya8zcYbtWbzzu2ZO29RU12itVCvv/ouzz87kRefn8SKFatYuvQTjj/yalq3bsXRh14OwCefrOKoQ4byyN+u4LGHX+SW288FYNfdtmHFylUsmL+Uzl3ac8HFx6497ukn/5o+W3VrjksqHRnt/itGY/2vTyWwZTXlPdJ1Vge3/+9g/jP1I265c9Rnyg/8yhf47zsfMWNWxdqyMc+9wc7b96btxm0oLy/jq3vvyJS3ZwDQtctmAHTssCmDT/kGf7j/7wB8OONjDthvFwC233ZLNt6ojUPLADj3/IE8MfYaRj51Fdf+7+ns2W97/v7irxn9j18y8qmrGPnUVWy8cWse+dsVAGzRozPjXnkLSO5rrVyxmk6d2/HJ8pUsX7YCgFdenEKrVmVs8/kezXZdJcH3uOrtPGCspLeBD9OyPsC2wLmNdM5c2XfP7Tn5mP2ZOOUDXv5b0ioa+qsHGf3Ma3z7iH0YMfLFz2y/YOFSbrlzFC88fg0RwehnXuPJv78KwK8vH8QXdkq6fH5x08NMnZbcF7vk6nv53XVn8oPvHkpEcOYFtzXhFVqenPfjo7lm6H3cf88zIBh69SlIoqJiMT/43m8pk+javSNX/GJQc1c19yKbWVQURTROj5ykMqAfnx2cMS4i1tRlf3cVWlOb/c4ZzV0FK0GbtT64QaNmm8EPFfW7891hx2Yu6hptVGFEVAIvN9bxzcysGiUwwMoPIJuZ5UlG71sVw8FlZpYnJfC0gYPLzCxPSqCrsASy2cyshDTwcHhJvSU9I2mypEmSfpiWd5Y0RtLb6c9Oabkk3SJpqqQ3JH2p4FiD0u3fljSooPzLkiam+9yi9cyE4OAyM8uRkIpa6mA1cGFE7ATsDZwjaSfgEmBsRPQFxqbfAQ4B+qbLYOA2SIIOGArsRTLifGhV2KXbnFmw34DaKuTgMjPLk7Iil/WIiJkRMSH9vBiYQvKY00Dg7nSzu4Ej088DgXsi8TLQUVIP4JvAmIioiIj5wBhgQLpus4h4OZLns+4pOFaNl2hmZnlRZFdh4Ryx6TK4pkNL2grYHXgF6B4RM9NVs4Du6eeefDrxBMD0tKy28unVlNfIgzPMzPKkyMEZhXPE1n5YtQP+ApwXEYsKb0NFREhqskkj3OIyM8uTRpirUFJrktD6U0Q8nBbPTrv5SH/OSctnAL0Ldu+VltVW3qua8povsU61NjOzbGjg15qkI/zuAqZExA0Fq0YCVSMDBwGPFZSfmo4u3BtYmHYpjgb6S+qUDsroD4xO1y2StHd6rlMLjlUtdxWameVINPzMGfsBpwATJb2Wll1G8nqqEZLOAN4HjkvXjQIOBaYCy4DTACKiQtJVQNU7mq6MiKpXXJwN/BFoC/wtXWrk4DIzy5MGDq6IeIGa22YHVbN9AOfUcKzhwPBqyscDu9S1Tg4uM7M8KYGZMxxcZmZ5UgIjFxxcZmZ54haXmZllil9rYmZmmeLgMjOzLKnjxLmZ5uAyM8sTD84wM7NMcYvLzMwyxfe4zMwsUxxcZmaWKfnPLQeXmVmeRHn+R2c4uMzM8sRdhWZmlin5zy0Hl5lZnpTlv6fQwWVmlicl8BiXg8vMLE9KOrgkLQai6mv6M9LPERGbNXLdzMysSCqB5KoxuCKifVNWxMzMNlwJ5FbdpmOU9BVJp6WfN5e0deNWy8zM6kMqbsmi9d7jkjQU2APYHvgD0Aa4F9ivcatmZmbFkkcVAnAUsDswASAiPpLkbkQzsxYoq62oYtQluFZGREgKAEmbNnKdzMysnkpg4ow63eMaIen3QEdJZwJPA3c0brXMzKw+fI8LiIhfS/oGsAjYDvh5RIxp9JqZmVnRshpGxajrA8gTgbYkz3FNbLzqmJnZhiiF57jW21Uo6bvAv4CjgWOBlyWd3tgVMzOz4qmsuCWL6tLi+jGwe0TMA5DUBXgRGN6YFTMzs+KVQIOrTsE1D1hc8H1xWmZmZi1MSQeXpAvSj1OBVyQ9RnKPayDwRhPUzczMilTSwQVUPWT8TrpUeazxqmNmZhuiFJ7jqm2S3SuasiJmZrbhSr3FBYCkrsBFwM7AxlXlEXFgI9bLzMzqoRSCqy6DIf8EvAVsDVwBvAeMa8Q6mZlZPalMRS1ZVJfg6hIRdwGrIuIfEXE64NaWmVkL5CmfEqvSnzMlHQZ8BHRuvCqZmVl9ZTWMilGX4LpaUgfgQuA3wGbA+Y1aKzMzqxcHFxARj6cfFwJfb9zqmJnZhsjobaui1PYA8m9IHjiuVkQMaZQamZlZvZV6i2t8k9XCzMwaRFYnzi1GbQ8g392UFTEzsw3X0C0uScOBbwFzImKXtOxy4ExgbrrZZRExKl13KXAGsAYYEhGj0/IBwM1AOXBnRPwyLd8aeADoAvwbOCUiVtZWpxLIZjOz0iGpqKUO/ggMqKb8xojYLV2qQmsn4ASSCSsGAL+TVC6pHLgVOATYCTgx3RbguvRY2wLzSUKvVg4uM7McaejnuCLiOaCijqcfCDwQESsiYhrJJO390mVqRLybtqYeAAYqSc4DgYfS/e8GjlzfSRxcZmY5UmxwSRosaXzBMriOpzpX0huShkvqlJb1BD4s2GZ6WlZTeRdgQUSsXqe8Vi12VOHyDzzHrzWtPUfMae4qWAkad1zDHq/Ye1wRMQwYVuRpbgOuIsmIq4DrgdOLPEa9eVShmVmONMVzXBExu+qzpDuAqud9ZwC9CzbtlZZRQ/k8oKOkVmmrq3D7GnlUoZlZjjRFcEnqEREz069HAW+mn0cC90m6AdgS6Av8CxDQNx1BOINkAMdJERGSngGOJbnvNYg6vPOxrq81uZhkJIhfa2Jm1oKVqcY7PPUi6X7gAGBzSdOBocABknYj6Sp8D/geQERMkjQCmAysBs6JiDXpcc4FRpMMhx8eEZPSU1wMPCDpauBV4K711akucxX+CXgQOAz4Pkkizq11DzMzaxatGrjFFREnVlNcY7hExDXANdWUjwJGVVP+Lsmowzrza03MzHKkTFHUkkV+rYmZWY6U9CS7BfxaEzOzjCiFh3P9WhMzsxxxiwuQ9AeqeRA5vddlZmYtiDJ636oYdekqfLzg88YkY/Y/apzqmJnZhnCLC4iIvxR+T8f0v9BoNTIzs3rzPa7q9QW6NXRFzMxsw2V1iHsx6nKPazGfvcc1i+RJZzMza2HcVQhERPumqIiZmW24UugqXO81ShpblzIzM2t+ZSpuyaLa3se1MbAJycSKnUhm94XkAeT1vujLzMyaXqnf4/oecB7J1PT/5tPgWgT8tpHrZWZm9ZDVVlQxansf183AzZJ+EBG/acI6mZlZPfkeV6JSUseqL5I6STq7EetkZmb1VAqzw9cluM6MiAVVXyJiPnBm41XJzMzqq6QHZxQol6SICABJ5UCbxq2WmZnVR1bDqBh1Ca4ngQcl/T79/r20zMzMWphSuMdVl+C6GBgMnJV+HwPc0Wg1MjOzesvqfatirDecI6IyIm6PiGMj4lhgMskLJc3MrIXxPa6UpN2BE4HjgGnAw41ZKTMzq5+S7iqUtB1JWJ0IfAw8CCgi/BZkM7MWKqutqGLU1uJ6C3ge+FZETAWQdH6T1MrMzOqlFN6AXFur8mhgJvCMpDskHcSn0z6ZmVkLVAr3uGoMroh4NCJOAHYAniGZt7CbpNsk9W+qCpqZWd2VFblkUV1GFS6NiPsi4nCgF/AqfpGkmVmLVApTPtVpVGGVdLqnYeliZmYtTFa7/4pRVHCZmVnL5uAyM7NMKW/uCjQBB5eZWY5k9b5VMRxcZmY54q5CMzPLFAeXmZllSrmDy8zMssQtLjMzyxQPzjAzs0xxi8vMzDLFz3GZmVmmtCpzV6GZmWWIRxWamVmmlMI9rqy+jsXMzKrR0C+SlDRc0hxJbxaUdZY0RtLb6c9Oabkk3SJpqqQ3JH2pYJ9B6fZvSxpUUP5lSRPTfW6RtN5aObjMzHKkEd6A/EdgwDpllwBjI6IvMDb9DnAI0DddBgO3QRJ0wFBgL6AfMLQq7NJtzizYb91z/f/XWKdqm5lZJpQrilrWJyKeAyrWKR4I3J1+vhs4sqD8nki8DHSU1AP4JjAmIirS9zqOAQak6zaLiJcjIoB7Co5VIweXmVmOlBW5SBosaXzBMrgOp+keETPTz7OA7unnnsCHBdtNT8tqK59eTXmtPDjDzCxHih2cEREb9Fb7iAipaafrcIvLzCxHGuEeV3Vmp918pD/npOUzgN4F2/VKy2or71VNee3XWO9qm5lZi9PQ97hqMBKoGhk4CHisoPzUdHTh3sDCtEtxNNBfUqd0UEZ/YHS6bpGkvdPRhKcWHKtG7io0M8uRhn6OS9L9wAHA5pKmk4wO/CUwQtIZwPvAcenmo4BDganAMuA0gIiokHQVMC7d7sqIqBrwcTbJyMW2wN/SpVYOLjOzHGno4IqIE2tYdVA12wZwTg3HGQ4Mr6Z8PLBLMXVycJmZ5UgpzJzh4DIzyxHPVWhmZpniF0mamVmmlMJQcQdXBsycOZeLLrqRefMWIMFxxw1g0KAjWLBgMeef/ytmzJhNz57duemmi+nQoR2LFy/lxz++no8+msuaNWs4/fSjOeaYgwHYcceBbLfd5wDo0aMrt9/+s+a8NGtBfrZnX77SoxPzV6zihNGvri0/btsefHvbHlRG8MLM+fzmjfcA+M4OvThi6+5URvDrV9/l5dkLAGjXupyf7tGXz3fYhACuGvc2E+ct5qBeXRi8cx+22mwTvvP060yZv6QZrjL/fI/LWoTy8nIuueR0dt55W5YsWcYxx5zPfvvtxsMPj2WffXZl8OBvM2zYnxk27CF+/OPv8Kc/PcHnP9+H22//ORUVCxkw4PscfvjXaNOmNRtv3IbHHruluS/JWqDHp81mxNsfccVe260t+3LXDnytZxdOeupVVlUGnTZqDcDWm7XlG326cvzoCXRt24Zbv7YLx/zt31QGXLj7Nrw0az6XvPQWrcrExuVJG+Cdhcu46MW3uPTL2zbL9ZWKUrjHVQqtyszr1q0zO++c/Mfert0mbLNNb2bPnsfYsa9w5JHJiNQjjzyIp59+GQBJLF26jIhg6dLldOjQnlatSuGF3rYhXv14EYtWrv5M2THbbsHdUz5kVWVy32T+ilUAfG3LLoz5YC6rKoOPlq7gwyWfsHPn9mzaupzdN+/AY9NmA7C6Mliyag0A7y1ezvuLlzfhFZWmMkVRSxa5xZUx06fPZsqUd/jiF7dn3rwFdOvWGYCuXTsxb17SVXPyyYdx1llX89WvDmLp0uXceONFlJUl/4+yYsVKjj76fFq1Kmfw4GM4+OB9mu1arOX7XLu27Na1A2d9YStWrqnk5tenMXn+Erq2bcOb8xav3W7OshV0bduGFWsqWbBiFUP37EvfjpsyZf4Srn/1XT5ZU9mMV1FaSqGrsMlbXJJOq2Xd2lmKhw17sCmrlQlLly5nyJBfcNllZ9Ku3SafWSeJqtevvfDCq+y449Y8//zdPProzVx55e0sWbIMgGeeGc7DD9/I9df/iGuvvZMPPpi57mnM1iovE5u1acVpY1/n5jemce0+O9S+vcT2ndrx0Dsz+Z8xr/HJ6kq+s2OvWvexhtVEcxU2q+boKryiphURMSwi9oiIPQYPPjcka94AAAawSURBVL4p69TirVq1miFDfsHhhx9A//77AtClS0fmzElmTZkzp4LOnTsC8PDDT9O//75I4nOf25Jevbbg3XeTNwd0794FgN69t6Bfv12YPPndZrgay4o5y1byzPR5AEyuWEIQdNyoFXOXr6T7Jhut3a7bJhsxd/lK5ixfwZzlK5hUkQy8GDv9Y7bv2K5Z6l6qin2tSRY1Sr3TVzZXt0zk0/e2WB1FBD/5yS1ss01vTjvt03esHXhgPx59dCwAjz46loMO2gtIRgu+9NLrAHz88XymTZtOr17dWbhwCStXJvcoKioWMmHCFLbdtjdmNXn2o3ns0a0DAH3abUzrsjIWrFjNcx9V8I0+XWldJrbcdCP6tGvLpIrFzPtkFbOXreBz7dsCsGf3jkxbtKw5L6HkSMUtWaRkaqkGPqg0m+SNl/PXXQW8GBFbrv8o/83mXcNGMH78JE4++RK2224rytK2/QUXnMquu27Heeddx8yZc9lyy27cdNPFdOzYntmz53HppTcxd+58IoIzzzyWgQO/zoQJUxg69FYkERGceuoRfPvb/Zv56lqOPUfMWf9GOXb13tvz5a4d6LhRK+Z9sophkz5g1Ptz+Pmefdmu46asqgxufn0a4+csBOC0HZPh8Gsqgxtem8aLs5L/3LfruCk/2WNbWpeVMWPpJ1z5r/+yeNUaDujZhR/tvg2dNmrN4lWr+e+CpQx5blJzXnKLMO64rzRofIyb+0RRvzv37HpY5uKrsYLrLuAPEfFCNevui4iT1n8UB5c1rVIPLmseDR1c4z8uLrj22Dx7wdUoowoj4oxa1tUhtMzMrD6yet+qGB4Ob2aWI8ros1nFcHCZmeVI5vr96sHBZWaWI1kdKVgMB5eZWY6UQG45uMzM8iSrs2EUw8FlZpYjJZBbDi4zszzxPS4zM8uUEsgtB5eZWZ44uMzMLFM8OMPMzDKlBHLLwWVmliee8snMzDLFXYVmZpYpnh3ezMwyxc9xmZlZppRAbjm4zMzyxC0uMzPLlBLILQeXmVmeeFShmZllSgnkloPLzCxP/ACymZlliltcZmaWKR5VaGZmmVICueXgMjPLk1KY8qkUrtHMrGRIxS11O6bekzRR0muSxqdlnSWNkfR2+rNTWi5Jt0iaKukNSV8qOM6gdPu3JQ2q7zU6uMzMckVFLnX29YjYLSL2SL9fAoyNiL7A2PQ7wCFA33QZDNwGSdABQ4G9gH7A0KqwK5aDy8wsR1TkPxtgIHB3+vlu4MiC8nsi8TLQUVIP4JvAmIioiIj5wBhgQH1O7OAyM8sRqazIRYMljS9YBldz2ACekvTvgvXdI2Jm+nkW0D393BP4sGDf6WlZTeVF8+AMM7NcKa4VFRHDgGHr2ewrETFDUjdgjKS31jlGqAmffHaLy8wsRxqjqzAiZqQ/5wCPkNyjmp12AZL+nJNuPgPoXbB7r7SspvKiObjMzHKlYQdnSNpUUvuqz0B/4E1gJFA1MnAQ8Fj6eSRwajq6cG9gYdqlOBroL6lTOiijf1pWNHcVmpnliNTg7ZHuwCNKxs63Au6LiCcljQNGSDoDeB84Lt1+FHAoMBVYBpwGEBEVkq4CxqXbXRkRFfWpkIPLzCxXGnbujIh4F/hiNeXzgIOqKQ/gnBqONRwYvqF1cnCZmeXIBg5xzwQHl5lZjji4zMwsY/I/5s7BZWaWIyqB95o4uMzMcsXBZWZmGeJ7XGZmljG+x2VmZhniFpeZmWWKB2eYmVnGOLjMzCxD5HtcZmaWLW5xmZlZhvgel5mZZYyDy8zMMsT3uMzMLGPc4jIzswwpa/g3ILc4Di4zs1xxcJmZWYZ4yiczM8sYB5eZmWWIn+MyM7OM8T0uMzPLkFK4x6WIaO46WAOTNDgihjV3Pax0+O+cNaX8tylL0+DmroCVHP+dsybj4DIzs0xxcJmZWaY4uPLJ9xqsqfnvnDUZD84wM7NMcYvLzMwyxcFlZmaZ4uDKEUkDJP1H0lRJlzR3fSz/JA2XNEfSm81dFysdDq6ckFQO3AocAuwEnChpp+atlZWAPwIDmrsSVlocXPnRD5gaEe9GxErgAWBgM9fJci4ingMqmrseVlocXPnRE/iw4Pv0tMzMLFccXGZmlikOrvyYAfQu+N4rLTMzyxUHV36MA/pK2lpSG+AEYGQz18nMrME5uHIiIlYD5wKjgSnAiIiY1Ly1sryTdD/wErC9pOmSzmjuOln+econMzPLFLe4zMwsUxxcZmaWKQ4uMzPLFAeXmZllioPLzMwyxcFlZmaZ4uAyM7NM+X9du4lFjCcAHQAAAABJRU5ErkJggg==\n"
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"class_names=[0,1] # referring to COMPANY & CUSTOMER\n",
"fig, ax = plt.subplots()\n",
"tick_marks = np.arange(len(class_names))\n",
"plt.xticks(tick_marks, class_names)\n",
"plt.yticks(tick_marks, class_names)\n",
"sns.heatmap(pd.DataFrame(cnf_matrix), annot=True, cmap=\"YlGnBu\" ,fmt='g')\n",
"ax.xaxis.set_label_position(\"top\")\n",
"plt.tight_layout()\n",
"plt.title('Confusion matrix', y=1.1)\n",
"plt.ylabel('Actual label')\n",
"plt.xlabel('Predicted label')"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "qpVickbeE2CO"
},
"source": [
"The weighted-averaged F1 score is calculated by taking the mean of all per-class F1 scores while considering each class's support. Support refers to the number of actual occurrences of the class in the dataset.\n",
"\n",
"Therefore, we will take into account the **weighted F1 score**."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "4prS4Tq1D2zD",
"outputId": "f1f3acfd-3a86-4815-d6cd-9540c65a1d9b"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.8709135822543531"
]
},
"metadata": {},
"execution_count": 16
}
],
"source": [
"from sklearn.metrics import f1_score\n",
"\n",
"f1_score(y_test, y_pred, average='weighted')"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ZQYUP9--JIHo"
},
"source": [
"Hash map for labels and their names"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "TPJQaMNLGoyv"
},
"outputs": [],
"source": [
"res_dict = {0:\"COMPANY\", 1:\"CUSTOMER\"}"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "9_cLos1RKwzq"
},
"source": [
"## 2.1.3 Printing wrong results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "0swhleR_FN8j",
"outputId": "a72d1e9d-b16d-408f-8bbb-411902f7916e"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[array(['leonard', 'cassert'], dtype='<U31')] has been classified as COMPANY and should be CUSTOMER\n",
"[array(['shelby', 'byler'], dtype='<U31')] has been classified as CUSTOMER and should be COMPANY\n",
"[array(['brien', 'keane'], dtype='<U31')] has been classified as CUSTOMER and should be COMPANY\n",
"[array(['melinda', 'sacks'], dtype='<U31')] has been classified as COMPANY and should be CUSTOMER\n",
"[array(['lance', 'leland', 'provencher'], dtype='<U31')] has been classified as CUSTOMER and should be COMPANY\n"
]
}
],
"source": [
"count = 0\n",
"for input, prediction, label in zip(X_test, y_pred, y_test):\n",
" if count == 5: break\n",
" elif prediction != label:\n",
" count += 1\n",
" print(vectorizer.inverse_transform(input), 'has been classified as ', res_dict[prediction], 'and should be ', res_dict[label])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UHpPEhjRJ9Y7"
},
"source": [
"Getting output feature names for transformation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "us9hzZG-bc_W",
"outputId": "38d4aa50-992d-4ab7-bf6b-9d3e5d5e3bbe"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array(['00', '000', '00142', ..., 'zywicki', 'zywien', 'zzzs'],\n",
" dtype=object)"
]
},
"metadata": {},
"execution_count": 19
}
],
"source": [
"vectorizer.get_feature_names_out()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "mZPSW-t1K10A"
},
"source": [
"## 2.1.4 Inference on User Inputs"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7-0iWS87K6l0"
},
"source": [
"Inputs need to be normalized in the way we normalized our train and test data.\n",
"- Removing punctuations\n",
"- Lowering text"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "ltMSk-7pCsYo",
"outputId": "f9a05d7b-c72e-4356-f03e-8d51564fe27b"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'!\"#$%&\\'()*+,-./:;<=>?@[\\\\]^_`{|}~'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 20
}
],
"source": [
"import string\n",
"\n",
"string.punctuation"
]
},
{
"cell_type": "markdown",
"source": [
"Following code is preprocessing the custom inputs coming from users.\n",
"It:\n",
"- removes punctuations (above)\n",
"- lowers the text\n",
"- Raise error if length is longer than 32 chars."
],
"metadata": {
"id": "kw-XstrPwER0"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "JgcDTA9ZcVrE",
"outputId": "fcb29eec-777c-4cd0-c762-f57d5581574c"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Input text: inscribeai \n",
" Prediction COMPANY\n",
"----------\n",
"Input text: string that \n",
" Prediction COMPANY\n",
"----------\n",
"Input text: erdal genc \n",
" Prediction CUSTOMER\n",
"----------\n",
"Input text: oisín moran \n",
" Prediction CUSTOMER\n",
"----------\n",
"Input text: jp morgan \n",
" Prediction COMPANY\n",
"----------\n",
"Input text: paramount sp \n",
" Prediction COMPANY\n",
"----------\n"
]
}
],
"source": [
"def preprocess_input(List) -> list:\n",
"\n",
" List = [s.lower() for s in List]\n",
" List = [s.translate(str.maketrans('', '', string.punctuation)) for s in List]\n",
" for s in List:\n",
" if len(s) > 32:\n",
" raise Exception(\"One or more strings are longer than 32 chars\")\n",
" return List\n",
"\n",
"def predictor(name_l) -> None:\n",
" name_l = preprocess_input(name_l)\n",
"\n",
" tmp = vectorizer.transform(name_l)\n",
" preds = logreg.predict(tmp)\n",
" \n",
" res = [res_dict[ele] for ele in preds]\n",
" for item in zip(name_l, res):\n",
" print(\"Input text:\", item[0], \"\\n\", \"Prediction\", item[1])\n",
" print(\"-\"*10)\n",
"\n",
"s1 = \"inscribeAI ~\"\n",
"s2 = \"string that\"\n",
"s3 = \"Erdal Genc\"\n",
"s4 = \"Oisín Moran\"\n",
"s5 = \"jp morgan\"\n",
"s6 = \"paramount S&P\"\n",
"\n",
"predictor([s1, s2, s3, s4, s5, s6])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "d6yvryDNLMzl"
},
"source": [
"## 2.2.1 Random Forest\n",
"\n",
"This is a tree based algorithm which may result better predictions (from my experience, tree based models are quite effective on tabular data). Therefore, second approach will be Random Forest. For more:\n",
"https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ee2yaBhBDpDJ",
"outputId": "57249a36-d0ab-44af-eb3d-514f66ef7200"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[28011, 4176],\n",
" [ 4262, 13884]])"
]
},
"metadata": {},
"execution_count": 22
}
],
"source": [
"from sklearn.ensemble import RandomForestClassifier\n",
"\n",
"# Setting random_state so that we can get same results (consistency in terms of producing same results)\n",
"RFCclf = RandomForestClassifier(n_estimators=50, bootstrap=False, random_state=48)\n",
"\n",
"RFCclf.fit(X_train, y_train)\n",
"\n",
"y_pred = RFCclf.predict(X_test)\n",
"\n",
"cnf_matrix = metrics.confusion_matrix(y_test, y_pred)\n",
"cnf_matrix"
]
},
{
"cell_type": "code",
"source": [
"class_names=[0,1] # referring to COMPANY & CUSTOMER\n",
"fig, ax = plt.subplots()\n",
"tick_marks = np.arange(len(class_names))\n",
"plt.xticks(tick_marks, class_names)\n",
"plt.yticks(tick_marks, class_names)\n",
"sns.heatmap(pd.DataFrame(cnf_matrix), annot=True, cmap=\"YlGnBu\" ,fmt='g')\n",
"ax.xaxis.set_label_position(\"top\")\n",
"plt.tight_layout()\n",
"plt.title('Confusion matrix', y=1.1)\n",
"plt.ylabel('Actual label')\n",
"plt.xlabel('Predicted label')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 343
},
"id": "CeluwNGWoj1X",
"outputId": "95827939-d61f-4aae-9666-b3b89c342768"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Text(0.5, 257.44, 'Predicted label')"
]
},
"metadata": {},
"execution_count": 23
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAa4AAAE0CAYAAAB0CNe/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3debxVVf3/8df7gsqkwgUFEacQVCRFJSLNORHNJIevpv6SjMK+aqbigFohJtq3LM0GY5CU/OZQmuCQSgxftURBVAScrooKMiigjCLg5/fH2RePdKdzudPe5/18PPaDc9Zee+21r3g/rGGvpYjAzMwsLUoauwJmZmaFcOAyM7NUceAyM7NUceAyM7NUceAyM7NUceAyM7NUceCyJk1SS0kPSvpI0l+3oJyzJD1el3VrLJIOlfRqY9fDrLHI73FZXZB0JnAJsDewEngBGBERT21hud8GfggcHBEbtriiTZykALpFRFlj18WsqXKLy7aYpEuAm4HrgY7ArsAfgAF1UPxuwGvFELRqQlLzxq6DWWNz4LItIml74Frg/Ii4PyJWR8T6iHgwIi5L8mwj6WZJ7yXHzZK2Sc4dIWm+pCGSlkhaKOmc5Nxw4KfA6ZJWSRok6RpJd+bdf3dJUf4LXdJ3JL0paaWktySdlZf+VN51B0uannRBTpd0cN65qZJ+JulfSTmPS+pQyfOX1//yvPp/U9Lxkl6TtEzSVXn5+0h6WtKHSd7fSdo6OfdEku3F5HlPzyv/CkmLgD+VpyXXdE3ucWDyvbOk9yUdsUX/Yc2aMAcu21JfAVoAf68iz9VAX6AXsD/QB/hx3vlOwPbAzsAg4PeS2kXEMHKtuHsiok1E3FZVRSS1Bm4BjouIbYGDyXVZbp6vFHg4ydse+DXwsKT2ednOBM4BdgS2Bi6t4tadyP0MdiYXaEcD/w84CDgU+ImkPZK8G4GLgQ7kfnZHA+cBRMRhSZ79k+e9J6/8UnKtz8H5N46IN4ArgDsltQL+BNwREVOrqK9Zqjlw2ZZqD3xQTVfeWcC1EbEkIt4HhgPfzju/Pjm/PiIeAVYBe9WyPp8CPSW1jIiFETGngjxfB16PiD9HxIaIuAt4BfhGXp4/RcRrEbEWuJdc0K3MenLjeeuBu8kFpd9ExMrk/nPJBWwi4rmImJbcdx4wEji8Bs80LCLWJfX5nIgYDZQBzwA7kfuHgllmOXDZlloKdKhm7KUz8Hbe97eTtE1lbBb41gBtCq1IRKwGTgd+ACyU9LCkvWtQn/I67Zz3fVEB9VkaERuTz+WBZXHe+bXl10vqLukhSYskrSDXoqywGzLP+xHxcTV5RgM9gd9GxLpq8pqlmgOXbamngXXAN6vI8x65bq5yuyZptbEaaJX3vVP+yYh4LCKOIdfyeIXcL/Tq6lNepwW1rFMhbiVXr24RsR1wFaBqrqly6q+kNuQmx9wGXJN0hZpllgOXbZGI+IjcuM7vk0kJrSRtJek4Sb9Ist0F/FjSDskkh58Cd1ZWZjVeAA6TtGsyMeTK8hOSOkoakIx1rSPX5fhpBWU8AnSXdKak5pJOB3oAD9WyToXYFlgBrEpag/+92fnFwBcKLPM3wIyI+B65sbs/bnEtzZowBy7bYhHxK3LvcP0YeB94F7gAeCDJch0wA5gFvATMTNJqc6+JwD1JWc/x+WBTktTjPWAZubGjzQMDEbEUOAEYQq6r83LghIj4oDZ1KtCl5CZ+rCTXGrxns/PXAHcksw5Pq64wSQOA/nz2nJcAB5bPpjTLIr+AbGZmqeIWl5mZpYoDl5mZpYoDl5mZpYoDl5mZpYoDl5mZpYoDlzUaSRslvSBptqS/Jmvt1bas2yWdmnweI6lHFXmPyF9Ut4B7zKtosd3K0jfLs6rAe10jqar1Ec2KlgOXNaa1EdErInoCn5BbqmmT2m7hERHfi4i5VWQ5gtwCvGaWQg5c1lQ8CeyZtIaelDQBmCupmaRfJluPzJJ0LoByfifpVUn/JLeKO8m5qZJ6J5/7S5op6UVJkyTtTi5AXpy09g5NVvS4L7nHdEmHJNe2T7Y0mSNpDNUvzYSkByQ9l1wzeLNzNyXpkyTtkKR1lfRocs2TlaytaGZ5vCmdNbqkZXUc8GiSdCDQMyLeSn75fxQRX1JuD69/SXocOIDcCvI9yG1eORcYu1m5O5BbneKwpKzSiFgm6Y/Aqoi4Mcn3F+CmiHhK0q7AY8A+wDDgqYi4VtLXyW25Up3vJvdoCUyXdF+yUkdrcssyXSzpp0nZFwCjgB9ExOuSvkxuA86javFjNCsaDlzWmFpKKt8v60lyi8QeDDwbEW8l6f2A/crHr8jt29UNOAy4K1mV/T1Jkysovy/wRHlZEbGsknp8DeghbWpQbZcsXHsYcHJy7cOSltfgmS6UdFLyeZekrkvJrZlYvrzTncD9yT0OBv6ad+9tanAPs6LmwGWNaW1EfG6fq+QX+Or8JOCHEfHYZvmOr8N6lAB9N986JC+Y1Ihyuw5/DfhKRKyRNJXcBpMVieS+H27+MzCzqnmMy5q6x4D/lrQVbNrPqjXwBHB6Mga2E3BkBddOI7eS/B7JteXbfawkt0p7uceBH5Z/kVQeSJ4gtyAuko4D2lVT1+2B5UnQ2ptci69cCVDeajyTXBfkCuAtSf+V3EOS9q/mHmZFz4HLmrox5MavZkqaTW7H4ObA34HXk3PjyO0L9jnJbsuDyXXLvchnXXUPAieVT84ALgR6J5M/5vLZ7Mbh5ALfHHJdhu9UU9dHgeaSXgZ+Ti5wllsN9Eme4Sjg2iT9LGBQUr85wIAa/EzMippXhzczs1Rxi8vMzFLFgcvMzFKlyc4qbLnrGe7DtAa19p3hjV0FK0rdC5u+Wo1Cf3eufeeuOr1/Q3CLy8zMUqXJtrjMzKxwUvbbIw5cZmYZoiLoSHPgMjPLELe4zMwsVRy4zMwsVQpdYzONHLjMzDLFLS4zM0sRdxWamVmqOHCZmVmqeDq8mZmliltcZmaWKg5cZmaWKg5cZmaWKsLvcZmZWYq4xWVmZqlSUpL9X+vZf0Izs6LiFpeZmaWIuwrNzCxVHLjMzCxVvHKGmZmliltcZmaWKt6Py8zMUsUtLjMzSxWPcZmZWaq4xWVmZqniwGVmZqnirkIzM0uXImhxZf8JzcyKiFRS0FF9edpF0hRJcyXNkfSjJP0aSQskvZAcx+ddc6WkMkmvSjo2L71/klYmaWhe+h6SnknS75G0dVV1cuAyM8sQSQUdNbABGBIRPYC+wPmSeiTnboqIXsnxSHL/HsC3gH2B/sAfJDWT1Az4PXAc0AM4I6+c/0nK2hNYDgyqqkIOXGZmGSJKCjqqExELI2Jm8nkl8DKwcxWXDADujoh1EfEWUAb0SY6yiHgzIj4B7gYGKBc9jwL+llx/B/DNqurkwGVmliGFdhVKGixpRt4xuPKytTtwAPBMknSBpFmSxkpql6TtDLybd9n8JK2y9PbAhxGxYbP0SjlwmZlliVTQERGjIqJ33jGq4mLVBrgPuCgiVgC3Al2BXsBC4FcN9YieVWhmliX10ByRtBW5oPW/EXE/QEQszjs/Gngo+boA2CXv8i5JGpWkLwXaSmqetLry81fILS4zsywpsMVVfXEScBvwckT8Oi99p7xsJwGzk88TgG9J2kbSHkA34FlgOtAtmUG4NbkJHBMiIoApwKnJ9QOB8VXVyS0uM7MsqfvV4Q8Bvg28JOmFJO0qcrMCewEBzAPOBYiIOZLuBeaSm5F4fkRszFVNFwCPAc2AsRExJynvCuBuSdcBz5MLlJVy4DIzy5I67keLiKeAiqLhI1VcMwIYUUH6IxVdFxFvkpt1WCMOXGZmGRLej8vMzFIl+3HLgcvMLFNKsh+5HLjMzLLEXYVmZpYq2Y9bDlxmZpnirkIzM0sVdxWamVmqZD9uOXCZmWWKuwrNzCxVsh+3HLjMzLLEK2eYmVm6uKvQzMxSJftxy4HLzCxT3FVoZmap4q5CMzNLlezHLQcuM7NMKanjnSSbIAcuM7MsyX7ccuAyM8sUT84wM7NUyX7ccuAyM8uS8KxCayxddiplzE3nseMO2xMBY/8yid+PfZT9euzGb68fxDbbbMWGjZ9y0dVjmfHiGwD8avhAjj2yF2vWfsLgIbfywux5AIwfN5Q+B+zJv2e8yinn/HLTPX4wsB8XDDqOrrt3osv+g1m6fGVjPKo1YRs3buSUUy6hY8dSRo4cxp13PsQdd0zgnXcW8vTTd1Jauj0AY8bcz4MPTt10zRtvzOfpp++kbdttWbFiFT/+8W957bW3kcT11/+IAw7YuxGfKuPcVWiNZcPGTxl63Z28MHsebVq34N8PX8+kJ19ixFVnMuLm+3h86osce2QvRlx1Jsee/jOOPbIXXXfvRM/DLqbPAXtyy4hBHDbgJwDcNPJBWrXchkFnHf25ezw94zUemTSTx+/5aWM8oqXAuHEP0rVrF1atWgPAgQfuwxFHfImzz77qc/m+972T+d73TgZg8uRnuf328bRtuy0AI0aM5tBDD+SWW67kk0/W8/HH6xr2IYpN9uNWMcw/SadFSz7c1GJatfpjXilbQOdOpUQE223bEoDtt23FwsXLATih30H85b4nAXj2+TK2364VnXZsC8DUf81h5aq1/3GPF+fM4535HzTA01gaLVr0AVOnTufUU/ttSuvRoytdunSs8rqHH/4/TjjhMABWrlzN9OmzN5Wx9dZbsd12beqv0pZ7AbmQI4XqrcUlaW9gALBzkrQAmBARL9fXPbNq1y4d6LXv7kx/vozLho/jwT9fyQ1X/z9KSsSRJw0DoHOnUuYvXLrpmgWLltG5UymLlnzYWNW2lLv++tFcdtk5rF79n//oqczatR/z5JMz+clPfgDA/PmLKS3dniuvvJlXXpnHvvt25eqrB9OqVYv6qrYVQVdhvbS4JF0B3E2u0fpscgi4S9LQKq4bLGmGpBkbVpXVR9VSp3Wrbbhr5MVcNnwcK1etZfC3j+Hya/9Mt74XcPm1f+bWXw5u7CpaBk2Z8iylpdvTs+eeBV43nQMP3GdTN+GGDRuZO/cNzjjjeB544De0bNmCUaP+Vh9VtnIq8Eih+uoqHAR8KSJ+HhF3JsfPgT7JuQpFxKiI6B0RvZu3Kex/mCxq3rwZd428mHv+/i/GPzodgLNOOYwH/vEsAPc9NI3e+3cF4L1Fy+iyU/tN1+7cqZT3Fi1r+EpbJsyc+TKTJz/LUUcN4pJLfsG0abO49NJfVXvdww8/wde/ftim7506daBTpw7sv/9eAPTvfwhz575Rb/U2iqKrsL4C16dA5wrSd0rOWQ388ZeDebXsPW4Z88imtIWLl3No330AOOKQfSmbtwiAhyfO5MxTDgWgzwF7smLlGncTWq0NGTKQJ564ncmTb+PXv76cvn3348Ybh1R5Tfl41tFH992UtsMO7ejUqQNvvjkfgKeffpGuXXep17oXvSIIXPU1xnURMEnS68C7SdquwJ7ABfV0z0w5+Et7cdYph/HSy+8w7R83ADDsF/dw/tDR/PKas2nerBnr1q3ngqFjAHh08vMce2Qv5jx5M2vWruPcS0duKuuffxtG966dadO6BWXP/I4fXDaKfz4xi/POOZZLfvANOu7QlumP/w+PTn6e864Y3SjPa+kwbtwExoy5nw8+WM6JJ17I4YcfxIgRFwIwceLTHHLIAf8xfvWTn5zLpZf+ivXrN7DLLh254YaLGqPqRSPSGYsKooion4KlEnJdg/mTM6ZHxMaaXN9y1zPqp2JmlVj7zvDGroIVpe51Gmq+MPhvBf3ufHPUqakLdfU2qzAiPgWm1Vf5ZmZWgSKYVegXkM3MsiSl41aFcOAyM8uSIlhWwoHLzCxL3FVoZmap4q5CMzNLk3CLy8zMUsVjXGZmliruKjQzs1RxV6GZmaWKW1xmZpYq2Y9bDlxmZlkSbnGZmVmqFEHgKoKJk2ZmRUQq7Ki2OO0iaYqkuZLmSPpRkl4qaaKk15M/2yXpknSLpDJJsyQdmFfWwCT/65IG5qUfJOml5JpbpKor5sBlZpYlJQUe1dsADImIHkBf4HxJPYChwKSI6AZMSr4DHAd0S47BwK2QC3TAMODL5La8GlYe7JI838+7rn91j2hmZllRxy2uiFgYETOTzyuBl8ntszgAuCPJdgfwzeTzAGBc5EwD2kraCTgWmBgRyyJiOTAR6J+c2y4ipkVug8hxeWVVyIHLzCxLSlTQIWmwpBl5x+DKipa0O3AA8AzQMSIWJqcWAR2Tzzvz2c73APOTtKrS51eQXilPzjAzy5ICJ2dExChgVHX5JLUB7gMuiogV+cNQERGSGmzXere4zMwyJKSCjpqQtBW5oPW/EXF/krw46eYj+XNJkr4A2CXv8i5JWlXpXSpIr5QDl5lZltTx5Ixkht9twMsR8eu8UxOA8pmBA4HxeelnJ7ML+wIfJV2KjwH9JLVLJmX0Ax5Lzq2Q1De519l5ZVXIXYVmZllS92sVHgJ8G3hJ0gtJ2lXAz4F7JQ0C3gZOS849AhwPlAFrgHMAImKZpJ8B05N810bEsuTzecDtQEvgH8lRKQcuM7MsqeMXkCPiKSpfSOroCvIHcH4lZY0FxlaQPgPoWdM6OXCZmWVJEayc4cBlZpYl2Y9bDlxmZlkSzbI/586By8wsS9xVaGZmqZL9uOXAZWaWJSXZ7yl04DIzy5K6f42r6XHgMjPLkKIOXJJWAuWLJpb/KCL5HBGxXT3XzczMClTNHoyZUGngiohtG7IiZma25YogbtVskV1JX5V0TvK5g6Q96rdaZmZWG3W8j2STVO0Yl6RhQG9gL+BPwNbAneQWXjQzsyZEnlUIwEnkdrws37r5PUnuRjQza4LS2ooqRE0C1yf5u1tKal3PdTIzs1oqgoUzajTGda+kkUBbSd8H/gmMrt9qmZlZbXiMC4iIGyUdA6wAugM/jYiJ9V4zMzMrWFqDUSFq+gLyS+R2pozks5mZNUHF8B5XtV2Fkr4HPAucDJwKTJP03fqumJmZFU4lhR1pVJMW12XAARGxFEBSe+DfVLD9spmZNa4iaHDVKHAtBVbmfV+ZpJmZWRNT1IFL0iXJxzLgGUnjyY1xDQBmNUDdzMysQEUduIDyl4zfSI5y4+uvOmZmtiWK4T2uqhbZHd6QFTEzsy1X7C0uACTtAFwO7Au0KE+PiKPqsV5mZlYLxRC4ajIZ8n+BV4A9gOHAPGB6PdbJzMxqSSUq6EijmgSu9hFxG7A+Iv4vIr4LuLVlZtYEecmnnPXJnwslfR14DyitvyqZmVltpTUYFaImges6SdsDQ4DfAtsBF9drrczMrFYcuICIeCj5+BFwZP1Wx8zMtkRKh60KUtULyL8l98JxhSLiwnqpkZmZ1Vqxt7hmNFgtzMysTqR14dxCVPUC8h0NWREzM9tyxd7iMjOzlCmG/bgcuMzMMqQI4pYDl5lZlhR14GrsWYVr3/Eav9awLpo2v7GrYEXo5r7d67S8og5ceFahmVnqFPV7XJ5VaGaWPkUduMol25pcAfTA25qYmTVpJap0hCczarqtyct4WxMzsyavuQo70sjbmpiZZUiJoqAjjbytiZlZhniMK8fbmpiZpUQRLFVY/TNGxEMR8VFEzI6IIyPioIiY0BCVMzOzwpSosKM6ksZKWiJpdl7aNZIWSHohOY7PO3elpDJJr0o6Ni+9f5JWJmloXvoekp5J0u+RtHV1darJrMI/UcGLyMlYl5mZNSGq+3Gr24HfAeM2S78pIm78/L3VA/gWsC/QGfinpPI3rH8PHAPMB6ZLmhARc4H/Scq6W9IfgUHArVVVqCZdhQ/lfW4BnERunMvMzJqYuh7jiognJO1ew+wDgLsjYh3wlqQyoE9yriwi3gSQdDcwQNLL5Cb7nZnkuQO4hi0NXBFxX/53SXcBT9XwIczMrAE14BjXBZLOJrfK0pCIWA7sDEzLyzM/SQN4d7P0LwPtgQ8jYkMF+StVm2fsBuxYi+vMzKyeFTodXtJgSTPyjsE1uM2tQFegF7AQ+FW9PtRmajLGtZLPj3EtIreShpmZNTGFdhVGxChgVIHXLC7/LGk0nw0pLQB2ycvaJUmjkvSlQFtJzZNWV37+StWkq3Db6vKYmVnT0BBdhZJ2ioiFydeTgPIZhxOAv0j6NbnJGd2AZwEB3STtQS4wfQs4MyJC0hTgVOBuYCAwvrr716TFNSkijq4uzczMGl9dT85I5jUcAXSQNB8YBhwhqRe53rh5wLkAETFH0r3AXGADcH5EbEzKuQB4DGgGjI2IOcktrgDulnQd8DxwW3V1qmo/rhZAq6Sy7chFTMi9gFzt4JmZmTW8ul7GKSLOqCC50uASESOAERWkPwI8UkH6m3w287BGqmpxnQtcRK659xyfBa4V5Ob0m5lZE1PUSz5FxG+A30j6YUT8tgHrZGZmteQln3I+ldS2/IukdpLOq8c6mZlZLRXD6vA1CVzfj4gPy78kL5l9v/6qZGZmtVXXaxU2RTVZ8qmZJEVEAEhqBlS7CKKZmTW8tAajQtQkcD0K3CNpZPL93CTNzMyamGIY46pJ4LoCGAz8d/J9IjC63mpkZma1ltZxq0LUZD+uTyPijxFxakScSu7FMs8yNDNrgjzGlZB0AHAGcBrwFnB/fVbKzMxqp6i7CpPNv85Ijg+AewBFxJENVDczMytQWltRhaiqxfUK8CRwQkSUAUi6uEFqZWZmtVIPOyA3OVW1Kk8mt8/KFEmjJR3NZ8s+mZlZE1QMY1yVBq6IeCAivgXsDUwht27hjpJuldSvoSpoZmY1V1LgkUY1mVW4OiL+EhHfILfJ1/N4I0kzsyapGJZ8qtGswnLJck8F75ZpZmYNI63df4UoKHCZmVnT5sBlZmap0qyxK9AAHLjMzDIkreNWhXDgMjPLEHcVmplZqjhwmZlZqjRz4DIzszRxi8vMzFLFkzPMzCxV3OIyM7NU8XtcZmaWKs1L3FVoZmYp4lmFZmaWKh7jMjOzVHHgMjOzVHHgMjOzVGnm97jMzCxNqt3WPgMcuMzMMsRdhWZmlioOXGZmlioe4zIzs1Rxi8vMzFLFgcvMzFLFgcvMzFLFaxWamVmqeCNJMzNLFb+AbE3Kxo0bOeWUS+jYsZSRI4cxZMiNzJ5dxlZbNeOLX+zOtdeez1Zb5f6TPvPMS1x//Wg2bNhAu3bbceedP2fhwve5/PKbWLr0QyQ47bT+DBx4YiM/lTUls8aM4/0XXmLr7bbl0Ot/CsBr901gycxZUCK23nZb9vv+2bRo15b1a9by4sg/8fHSZcTGT9njuK/R5bCDAXjlnvt5/4XZRAQdeu7NPmedhvRZH9ZzN/2BNe9/sOkeVnc8xmVNyrhxD9K1axdWrVoDwIknHsGNNw4BYMiQG/nrXx/nzDOPZ8WKVQwffitjxlxD5847snTphwA0a9aMoUO/y7777smqVWs45ZSLOeSQXuy5566N9kzWtHT56lfY7WtHMGvU7ZvS9jj+GLqfkvsHzrzHJ1M2/hF6fudM3pk0lTadd6L3xeexbsVKnhx6DZ0P7sNHb73N8tfe4KsjfgzAtOtuZNkrr9N+n+4ALJrxPM1abNPgz1YsimGMqxhalZmwaNEHTJ06nVNP7bcp7fDDeyMJSey3XzcWL/4AgAcf/D+OOeYrdO68IwDt27cFYMcdS9l33z0BaNOmFV/4wi4sXry0gZ/EmrLSvbuxVevWn0vbqmXLTZ83rvsk74zY+PHHRAQb161jq9atUUkJSHy6fj2fbtjAp+s38OnGjWyz/bYAbPj4Y+Y9OomuJx7fEI9TlEoUBR1p5MCVEtdfP5rLLjuHkpL//E+2fv0Gxo+fwqGHHgTAvHnvsWLFKr797Ss5+eSLeOCByf9xzfz5i3n55TfYf/+96r3uln6v/W08Uy6+iveefpZuJ38DgN2+dgSr3lvElB8N5amrr2Ofs/4LlZTQbs8v0H6fvZj8o6FM/tEV7PDFHrTpvBMAr9/3ILv3/xrNtt66MR8n00pU2FEdSWMlLZE0Oy+tVNJESa8nf7ZL0iXpFkllkmZJOjDvmoFJ/tclDcxLP0jSS8k1tyi/T7myZyz0h7KlJJ1TxbnBkmZImjFq1D0NWa0mbcqUZykt3Z6ePfes8Pzw4bfSu3dPevfeF8iNhc2Z8wYjRw5jzJjh/OEPd/PWWws25V+9ei0XXngDV131fdq0adUgz2Dp1v3UARx50/V0/kof3vnnVADenz2X7XbtwpG/+TmH/Owq5v75HtavXcvqxUtYtXARR950PUfefANL577KsldfZ8Xb77Jmyft06t2rcR8m4+o6cAG3A/03SxsKTIqIbsCk5DvAcUC35BgM3Aq5QAcMA74M9AGGlQe7JM/3867b/F7/+Yw1qnbdGl7ZiYgYFRG9I6L34MGnN2SdmrSZM19m8uRnOeqoQVxyyS+YNm0Wl176KwB+97u7WLbsI668ctCm/J06teerXz2AVq1aUFq6Pb179+SVV94Ccq2zCy+8gW984wj69Tu4UZ7H0qvzwX1YNON5ABY8+TQde/dCEq077kjLHdqz+r3FLH7uBdp23YPmLVrQvEULOuy3Lx+WvcXysjf5aN47TB1yNdNG3MjqRUt45oZfN/ITZU9JgUd1IuIJYNlmyQOAO5LPdwDfzEsfFznTgLaSdgKOBSZGxLKIWA5MBPon57aLiGkREcC4vLIqVS+TMyTNquwU0LE+7pllQ4YMZMiQXMv6mWdeYuzY+7nxxiH89a+P8dRTM7n99us+14V49NF9ufbaP7Jhw0bWr1/PrFmv8p3vDCAiuPrqW/jCF3bhnHOq/bthBsDqRUto3Sk3Xrp45ou03qkTAC1K27F07quU7tWNdR+tYPXCxbTasQNrP/iAd6c+xacnHAsBy199nd36HUXHA/Zjt6MPB2DN+0t57qbf8+UrL2m058qq6jvaNs+vweRaR+VGRcSoai7rGBELk8+L+Oz3+s7Au3n55idpVaXPryC9SvU1q7AjuQi7fLN0Af+up3sWnWHD/kDnzjty+umXAXDMMV/hggvOoGvXXTj00IM48cQfUlIiTj21H92778aMGXMYP34K3bvvzoABFwJwySVnc/jhvRvzMawJeeEPt7HslXdQoL0AAAWsSURBVNf4ZNUqJl90Jd1OOoH3Z81m9cLFSCW06FBKz4FnArDngOOZNXocT179M4hgr9NOYutt29DpSweydO6rPHX1dSDY4Yv70vGA/Rr5yYpHoZMKkyBVXaCq6vqQGnaWh3KtszouVLoN+FNEPFXBub9ExJnVl/JaOqe7WGpdNG1+9ZnM6tjNfY+q0wnsMz54uKDfnb07fL3a+0vaHXgoInom318FjoiIhUl339SI2EvSyOTzXfn5yo+IODdJHwlMTY4pEbF3kn5Gfr7K1MsYV0QMqihoJedqELTMzKw26nqMqxITgPKZgQOB8XnpZyezC/sCHyVdio8B/SS1SyZl9AMeS86tkNQ3mU14dl5ZlfILyGZmGVLXvXaS7iLXYuogaT652YE/B+6VNAh4Gzgtyf4IcDxQBqwBzgGIiGWSfgZMT/JdGxHlEz7OIzdzsSXwj+SokgOXmVmG1PXCGRFxRiWnjq4gbwDnV1LOWGBsBekzgJ6F1MmBy8wsQwqdVZhGDlxmZhlSBHHLgcvMLEu8OryZmaVKEcQtBy4zsyzxGJeZmaVKEcQtBy4zsyxx4DIzs1Tx5AwzM0uVIohbDlxmZlnSwAu1NwoHLjOzDHFXoZmZpUpjbGvf0By4zMwyxO9xmZlZqhRB3HLgMjPLEre4zMwsVYogbjlwmZlliWcVmplZqhRB3HLgMjPLEr+AbGZmqeIWl5mZpYpnFZqZWaoUQdxy4DIzyxIv+WRmZqnirkIzM0uZ7EcuBy4zswyRA5eZmaWJlP1RLgcuM7NMcYvLzMxSxF2FZmaWMg5cZmaWIh7jMjOzlHGLy8zMUsRjXGZmlioOXGZmljIe4zIzsxRRESxW6MBlZpYpDlxmZpYiHuMyM7OU8RiXmZmliFtcZmaWKp6cYWZmKePAZWZmKaIiGOPK/hOamRUVFXjUoERpnqSXJL0gaUaSVippoqTXkz/bJemSdIukMkmzJB2YV87AJP/rkgbW9gkduMzMMkRSQUcBjoyIXhHRO/k+FJgUEd2AScl3gOOAbskxGLg1qVcpMAz4MtAHGFYe7ArlwGVmlil13+KqxADgjuTzHcA389LHRc40oK2knYBjgYkRsSwilgMTgf61ubEDl5lZhoiSwg5psKQZecfgCooN4HFJz+Wd7xgRC5PPi4COyeedgXfzrp2fpFWWXjBPzjAzy5TCWlERMQoYVU22r0bEAkk7AhMlvbJZGSEpCqtn7bnFZWaWISUqKeioiYhYkPy5BPg7uTGqxUkXIMmfS5LsC4Bd8i7vkqRVll74M9bmIjMza6pKCjyqJqm1pG3LPwP9gNnABKB8ZuBAYHzyeQJwdjK7sC/wUdKl+BjQT1K7ZFJGvyStYO4qNDPLkHpY8qkj8PdkBmJz4C8R8aik6cC9kgYBbwOnJfkfAY4HyoA1wDkAEbFM0s+A6Um+ayNiWW0q5MBlZpYpdRu4IuJNYP8K0pcCR1eQHsD5lZQ1Fhi7pXVy4DIzyxCvVWhmZimT/akLDlxmZhlSDNuaKNcdaVkiaXDyboZZg/DfOWtI2W9TFqeK3nw3q0/+O2cNxoHLzMxSxYHLzMxSxYErmzzWYA3Nf+eswXhyhpmZpYpbXGZmlioOXGZmlioOXBkiqb+kVyWVSRpa/RVmW0bSWElLJM1u7LpY8XDgyghJzYDfA8cBPYAzJPVo3FpZEbidWm6/blZbDlzZ0Qcoi4g3I+IT4G5gQCPXyTIuIp4AarU1hVltOXBlx87Au3nf5ydpZmaZ4sBlZmap4sCVHQuAXfK+d0nSzMwyxYErO6YD3STtIWlr4FvAhEauk5lZnXPgyoiI2ABcADwGvAzcGxFzGrdWlnWS7gKeBvaSNF/SoMauk2Wfl3wyM7NUcYvLzMxSxYHLzMxSxYHLzMxSxYHLzMxSxYHLzMxSxYHLzMxSxYHLzMxS5f8DWpAjGVXHbssAAAAASUVORK5CYII=\n"
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"source": [
"f1_score(y_test, y_pred, average='weighted')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uf6LVw9PooyI",
"outputId": "5dc401c6-79cc-4c97-8b14-7e859c7ec5cb"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.83226923581755"
]
},
"metadata": {},
"execution_count": 24
}
]
},
{
"cell_type": "code",
"source": [
"count = 0\n",
"for input, prediction, label in zip(X_test, y_pred, y_test):\n",
" if count == 5: break\n",
" elif prediction != label:\n",
" count += 1\n",
" print(vectorizer.inverse_transform(input), 'has been classified as ', res_dict[prediction], 'and should be ', res_dict[label])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "9QgM-HLFpvbH",
"outputId": "a72f9dc9-e72f-4e2f-8eae-7ab4b2b244b3"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[array(['cassert', 'leonard'], dtype='<U31')] has been classified as COMPANY and should be CUSTOMER\n",
"[array(['byler', 'shelby'], dtype='<U31')] has been classified as CUSTOMER and should be COMPANY\n",
"[array(['hall', 'joy'], dtype='<U31')] has been classified as COMPANY and should be CUSTOMER\n",
"[array(['gunter', 'unruh'], dtype='<U31')] has been classified as CUSTOMER and should be COMPANY\n",
"[array(['brien', 'keane'], dtype='<U31')] has been classified as CUSTOMER and should be COMPANY\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## 2.3.1 SBert\n",
"\n",
"SentenceTransformers is a Python framework for state-of-the-art sentence, text embeddings. "
],
"metadata": {
"id": "zIP1Ca5ys6m7"
}
},
{
"cell_type": "code",
"source": [
"!pip install torch transformers evaluate memory_profiler datasets -q"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "dFSl_yRyRY7x",
"outputId": "e5953d67-baa2-4a81-8f9c-aa3bdd51682f"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/81.4 KB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 KB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h"
]
}
]
},
{
"cell_type": "code",
"source": [
"X = df[\"input_text\"]\n",
"y = df[\"label\"]\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=48)"
],
"metadata": {
"id": "jI9FXDJQ1lzQ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## 2.3.2 Preparing Dataset for HuggingFace Dataset Class for Compatibility"
],
"metadata": {
"id": "QmklXAGrTq8y"
}
},
{
"cell_type": "code",
"source": [
"import datasets\n",
"import pandas as pd\n",
"\n",
"\n",
"train_df = pd.DataFrame({\n",
" \"text\" : X_train,\n",
" \"label\" : y_train\n",
"})\n",
"\n",
"test_df = pd.DataFrame({\n",
" \"text\" : X_test,\n",
" \"label\" : y_test\n",
"})\n",
"\n",
"train_dataset = Dataset.from_dict(train_df)\n",
"test_dataset = Dataset.from_dict(test_df)\n",
"my_dataset_dict = datasets.DatasetDict({\"train\":train_dataset,\"test\":test_dataset})"
],
"metadata": {
"id": "F1VxkExFJNeR"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"my_dataset_dict"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "f43vP9lML3DO",
"outputId": "040cec9c-b1b3-4f59-8e9c-24a39aac6e03"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"DatasetDict({\n",
" train: Dataset({\n",
" features: ['text', 'label'],\n",
" num_rows: 201332\n",
" })\n",
" test: Dataset({\n",
" features: ['text', 'label'],\n",
" num_rows: 50333\n",
" })\n",
"})"
]
},
"metadata": {},
"execution_count": 58
}
]
},
{
"cell_type": "markdown",
"source": [
"## 2.3.3 Using Sentence Embeddings\n",
"\n",
"Dataset consists of multiple tokens, therefore using sentence embeddings instead of token count is better approach. Also, BERT (transformers) keep location information of the tokens.\n",
"Source: https://towardsdatascience.com/sentence-embedding-3053db22ea77"
],
"metadata": {
"id": "jm9RdHtRUPQj"
}
},
{
"cell_type": "code",
"source": [
"import torch\n",
"import random\n",
"from transformers.file_utils import is_tf_available, is_torch_available, is_torch_tpu_available\n",
"from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments\n",
"from datasets import load_metric\n",
"from sklearn.model_selection import train_test_split\n",
"import pandas as pd\n",
"import numpy as np\n",
"%load_ext memory_profiler\n",
"\n",
"# Call the Tokenizer\n",
"tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/bert-base-nli-mean-tokens', do_lower_case=True)\n",
"\n",
"def preprocess_function(examples):\n",
" return tokenizer(examples[\"text\"], truncation=True)\n",
"\n",
"tokenized_dataset = my_dataset_dict.map(preprocess_function, batched=True)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 155,
"referenced_widgets": [
"9c4fb2d129444963aea420c854eef3fc",
"3b5cacca64fd493585821976258161ea",
"23189cd56a1241a0a380908881212b80",
"5c195943784a4148879a2eba852de73e",
"96c47c1a7a56495b8769b2abc91c7554",
"c267747bc9eb4f479725973e6f5a45e3",
"506588deea78460ea457fcfe500e6573",
"840a8c9797104868a9de57f861911ebd",
"1975d008d30a4ca3886ee5df896b7761",
"80925e9024c0420094607c2c6acb6bdc",
"b080984034b946ed82c2a980142bf692",
"1f6f87f4e0ef4832860c44a70eb1e4b0",
"c5a6012a4db54497aef8b5b504299e18",
"e2433a191b9d439e97194e46c8ef911b",
"5d6825da704e4a669ab5e65025aab34a",
"f1e91c961e804fc4952c0e01821bcfc6",
"5a7feaa348e948c9a49128440581575e",
"516791f2b3c44db289647cf13406ab3a",
"fd29542280ee4e93b98cc6435bac95cc",
"a6318f96b86e4f78b7b83dbdb06650e3",
"3b94b9df7c9d4e2e84bafab06d4c996e",
"9b41efcc8b6e4fb598528a9471c2ce09"
]
},
"id": "HnGRUESts420",
"outputId": "6f46ca75-181e-41a6-e18e-4deecb897023"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"The memory_profiler extension is already loaded. To reload it, use:\n",
" %reload_ext memory_profiler\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
" 0%| | 0/202 [00:00<?, ?ba/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "9c4fb2d129444963aea420c854eef3fc"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
" 0%| | 0/51 [00:00<?, ?ba/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "1f6f87f4e0ef4832860c44a70eb1e4b0"
}
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"from transformers import DataCollatorWithPadding\n",
"\n",
"data_collator = DataCollatorWithPadding(tokenizer=tokenizer)"
],
"metadata": {
"id": "1s54-PQcMU_R"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Using F1 score"
],
"metadata": {
"id": "-1OVZ3cJVEMt"
}
},
{
"cell_type": "code",
"source": [
"import evaluate\n",
"\n",
"f1_metric = evaluate.load(\"f1\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 49,
"referenced_widgets": [
"2927debb8f1a4f8896e8e3e64672aa3b",
"ee01f994e4884ee289af2076c2418f9c",
"25c4ae34365547afbf0757d7a1983eb9",
"3272acbdf75f4dc5a54058638cd5b3ca",
"6884a271eac1481ca360bc792d61ae48",
"000629234cfe477b971b14ca9cec62bb",
"88cc1d67e50d4539b50334f8661e1910",
"962f9d867fd844c8b4810bf394821f96",
"aeb7ffe019804fb28c64cd406dc1f118",
"c3dbae8b12c64860a2fb9332dd105144",
"661a6a170eb64681816b2ba3d0d664b2"
]
},
"id": "Fh8PG7woONNi",
"outputId": "e01f4d5f-3de2-4669-e461-25bca888600c"
},
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading builder script: 0%| | 0.00/6.77k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "2927debb8f1a4f8896e8e3e64672aa3b"
}
},
"metadata": {}
}
]
},
{
"cell_type": "markdown",
"source": [
"## 2.3.4 Pytorch as Backend\n",
"\n",
"We are returing tensors with pytorch since it is slightly easier to read. HuggingFace also supports tensorflow for returning tensors."
],
"metadata": {
"id": "to29EmFoVReJ"
}
},
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer\n",
"\n",
"def compute_metrics(eval_pred):\n",
" predictions, labels = eval_pred\n",
" predictions = np.argmax(predictions, axis=1)\n",
" return accuracy.compute(predictions=predictions, references=labels)\n",
"\n",
"id2label = {0: \"COMPANY\", 1: \"CUSTOMER\"}\n",
"label2id = {\"COMPANY\": 0, \"CUSTOMER\": 1}\n",
"\n",
"model = AutoModelForSequenceClassification.from_pretrained(\n",
" \"distilbert-base-uncased\", num_labels=2, id2label=id2label, label2id=label2id\n",
")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 190,
"referenced_widgets": [
"95b0db5c543f43eebf63635730b7037d",
"41118f6465ce48c39dec15b4d1103952",
"43e310d1238a4b409748ec997e8b0862",
"99cbf6b355344cbdb355aef2885f9031",
"402feee4662a433bbcf178a79302a770",
"4f7125ae83804de0a29f4bc79ed64684",
"b96c6005b2d145639ee11f7d884fa60f",
"cb5d668913694d928fa866c141574f45",
"5e4c10ef1d7c489e920cee620fbd3f98",
"6e65358a194b4688810ff2811401500d",
"27dae2fa50ef45678e14b873371e80ae",
"d068bd0702fe4088b8f39a5c950d6e37",
"b526822b801249c4ac5f4c39a8cbb926",
"a89e11a023c7463794f82cf2ac63df37",
"eb1edbcadfd649a0ba0532c85079173a",
"1a3c5460e6de4d8c897e51dea64851da",
"544e750a299545f08b9c5938228d9c12",
"e1f7c2e195324494bee2e45e1930e4f7",
"97a00339991e4f27bae09592918eccd4",
"0cd8237e1fb74ea4ad2c7c5973c9081b",
"b24abd4b909f44eda2347a43b2e8c719",
"6f12fee2b06d41c3855fd50df4c53841"
]
},
"id": "IwUC7vUmOQUi",
"outputId": "6f25bc85-222f-47a2-a76e-1843dbacba84"
},
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)lve/main/config.json: 0%| | 0.00/483 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "95b0db5c543f43eebf63635730b7037d"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)\"pytorch_model.bin\";: 0%| | 0.00/268M [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "d068bd0702fe4088b8f39a5c950d6e37"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_projector.weight']\n",
"- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'classifier.bias', 'classifier.weight', 'pre_classifier.bias']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Let's train it!"
],
"metadata": {
"id": "wdpJIpAjfwXZ"
}
},
{
"cell_type": "code",
"source": [
"training_args = TrainingArguments(\n",
" output_dir=\"inScribe_model\",\n",
" learning_rate=2e-5,\n",
" per_device_train_batch_size=16,\n",
" per_device_eval_batch_size=16,\n",
" num_train_epochs=2,\n",
" weight_decay=0.01,\n",
" evaluation_strategy=\"epoch\",\n",
" save_strategy=\"epoch\",\n",
" load_best_model_at_end=True\n",
")\n",
"\n",
"trainer = Trainer(\n",
" model=model,\n",
" args=training_args,\n",
" train_dataset=tokenized_dataset[\"train\"],\n",
" eval_dataset=tokenized_dataset[\"test\"],\n",
" tokenizer=tokenizer,\n",
" data_collator=data_collator,\n",
" compute_metrics=compute_metrics,\n",
")\n",
"\n",
"trainer.train()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 740
},
"id": "2kf3KfiRPFZG",
"outputId": "0d7f0e41-62e4-4415-91da-e5e2cd1efc58"
},
"execution_count": null,
"outputs": [
{
"metadata": {
"tags": null
},
"name": "stderr",
"output_type": "stream",
"text": [
"PyTorch: setting up devices\n",
"The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n",
"The following columns in the training set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: token_type_ids, text. If token_type_ids, text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n",
"/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
"***** Running training *****\n",
" Num examples = 201332\n",
" Num Epochs = 2\n",
" Instantaneous batch size per device = 16\n",
" Total train batch size (w. parallel, distributed & accumulation) = 16\n",
" Gradient Accumulation steps = 1\n",
" Total optimization steps = 25168\n",
" Number of trainable parameters = 66955010\n",
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
]
},
{
"data": {
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='5825' max='25168' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [ 5825/25168 2:07:23 < 7:03:11, 0.76 it/s, Epoch 0.46/2]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Epoch</th>\n",
" <th>Training Loss</th>\n",
" <th>Validation Loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table><p>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='6125' max='25168' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [ 6125/25168 2:13:54 < 6:56:28, 0.76 it/s, Epoch 0.49/2]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Epoch</th>\n",
" <th>Training Loss</th>\n",
" <th>Validation Loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table><p>"
]
},
"metadata": {}
},
{
"output_type": "error",
"ename": "KeyboardInterrupt",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-66-b9b00cf08581>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 21\u001b[0m )\n\u001b[1;32m 22\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/usr/local/lib/python3.8/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1541\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_inner_training_loop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_train_batch_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_find_batch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1542\u001b[0m )\n\u001b[0;32m-> 1543\u001b[0;31m return inner_training_loop(\n\u001b[0m\u001b[1;32m 1544\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1545\u001b[0m \u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.8/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1856\u001b[0m \u001b[0moptimizer_was_run\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscale_before\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0mscale_after\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1857\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1858\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1859\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1860\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0moptimizer_was_run\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdeepspeed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.8/dist-packages/torch/optim/lr_scheduler.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0minstance\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_step_count\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0mwrapped\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minstance\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 69\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;31m# Note that the returned function here is no longer a bound method,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.8/dist-packages/torch/optim/optimizer.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0mprofile_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Optimizer.step#{}.step\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprofiler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecord_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprofile_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 140\u001b[0;31m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 141\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_optimizer_step_code\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 142\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.8/dist-packages/transformers/optimization.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, closure)\u001b[0m\n\u001b[1;32m 358\u001b[0m \u001b[0;31m# Decay the first and second moment running average coefficient\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 359\u001b[0m \u001b[0;31m# In-place operations to update the averages at the same time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 360\u001b[0;31m \u001b[0mexp_avg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmul_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbeta1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgrad\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1.0\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mbeta1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 361\u001b[0m \u001b[0mexp_avg_sq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmul_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbeta2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maddcmul_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgrad\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1.0\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mbeta2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 362\u001b[0m \u001b[0mdenom\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mexp_avg_sq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msqrt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"eps\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# 3. Future Steps & Considerations"
],
"metadata": {
"id": "PVJ2Td9Lf19q"
}
},
{
"cell_type": "markdown",
"source": [
"\n",
"1. Logistic Regression performs quite good for a start. We can definitely improve its performance by cleaning the outliers. To find outliers, we need to consider that:\n",
" * It can be a wrong data entry (Eg. human typing error)\n",
" * It can be a data that has values that are not relevant (Eg. an entry of total which is calculated as the sum of the above columns. This data can be misleading at times so it should be removed)\n",
" * It can be a data entry that is all or most fields blank (Eg. a row in the data where all fields are blank. This row maybe not contributing anything to the analysis)\n",
" * It may be extreme values which fall way out of the range of the other data (Eg. length is too long to be a company or customer name)\n",
"\n",
"2. Random Forest is both slower and performing slightly worse. To be sure, we might need to test it again after removing the outliers.\n",
"\n",
"3. Even though model training is not completed yet, I do believe its performance will be better. It is because it is based on BERT and its performance might be considered as SoTA. However, it is a heavy model and not cheap-to-deploy, we might just pick Logistic Regression for the speed.\n",
"\n",
"4. Employing LightGBM and XGBoost might also be effective since they are boosting algorithms and might increase performance. \n",
"\n",
"5. If there was time I would also try TF-IDF. Because, **CountVectorizer** simply counts the number of times a word appears in a document (using a bag-of-words approach), while **TF-IDF** Vectorizer takes into account not only how many times a word appears in a document but also how important that word is to the whole corpus.\n",
"\n",
"6. After trying Logistic Regression, Random Forest, LightGBM, we can convert results to an ensemble model. To be specific, following this tutorial: https://www.geeksforgeeks.org/ensemble-methods-in-python/\n",
"\n",
"7. To deploy these models, I would follow these steps using **FastAPI** For Logistic Regression and Random Forest: https://towardsdatascience.com/colabcode-deploying-machine-learning-models-from-google-colab-54e0d37a7b09\n",
"For HuggingFace Models: \n",
"https://colab.research.google.com/drive/1jrKblK4iISeilrCasc02G8JAd8Z3G45h?usp=sharing\n",
"\n",
"8. I tried not to exceed 4 hours. Training a transformer taking a lot of time. Therefore, I am submitting this task without completing that. \n",
"\n",
"\n"
],
"metadata": {
"id": "i3199G-Xf-43"
}
}
],
"metadata": {
"colab": {
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"9c4fb2d129444963aea420c854eef3fc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_3b5cacca64fd493585821976258161ea",
"IPY_MODEL_23189cd56a1241a0a380908881212b80",
"IPY_MODEL_5c195943784a4148879a2eba852de73e"
],
"layout": "IPY_MODEL_96c47c1a7a56495b8769b2abc91c7554"
}
},
"3b5cacca64fd493585821976258161ea": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c267747bc9eb4f479725973e6f5a45e3",
"placeholder": "​",
"style": "IPY_MODEL_506588deea78460ea457fcfe500e6573",
"value": "100%"
}
},
"23189cd56a1241a0a380908881212b80": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_840a8c9797104868a9de57f861911ebd",
"max": 202,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_1975d008d30a4ca3886ee5df896b7761",
"value": 202
}
},
"5c195943784a4148879a2eba852de73e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_80925e9024c0420094607c2c6acb6bdc",
"placeholder": "​",
"style": "IPY_MODEL_b080984034b946ed82c2a980142bf692",
"value": " 202/202 [00:08&lt;00:00, 20.86ba/s]"
}
},
"96c47c1a7a56495b8769b2abc91c7554": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c267747bc9eb4f479725973e6f5a45e3": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"506588deea78460ea457fcfe500e6573": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"840a8c9797104868a9de57f861911ebd": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"1975d008d30a4ca3886ee5df896b7761": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"80925e9024c0420094607c2c6acb6bdc": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b080984034b946ed82c2a980142bf692": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"1f6f87f4e0ef4832860c44a70eb1e4b0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_c5a6012a4db54497aef8b5b504299e18",
"IPY_MODEL_e2433a191b9d439e97194e46c8ef911b",
"IPY_MODEL_5d6825da704e4a669ab5e65025aab34a"
],
"layout": "IPY_MODEL_f1e91c961e804fc4952c0e01821bcfc6"
}
},
"c5a6012a4db54497aef8b5b504299e18": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_5a7feaa348e948c9a49128440581575e",
"placeholder": "​",
"style": "IPY_MODEL_516791f2b3c44db289647cf13406ab3a",
"value": "100%"
}
},
"e2433a191b9d439e97194e46c8ef911b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_fd29542280ee4e93b98cc6435bac95cc",
"max": 51,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_a6318f96b86e4f78b7b83dbdb06650e3",
"value": 51
}
},
"5d6825da704e4a669ab5e65025aab34a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3b94b9df7c9d4e2e84bafab06d4c996e",
"placeholder": "​",
"style": "IPY_MODEL_9b41efcc8b6e4fb598528a9471c2ce09",
"value": " 51/51 [00:01&lt;00:00, 29.13ba/s]"
}
},
"f1e91c961e804fc4952c0e01821bcfc6": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"5a7feaa348e948c9a49128440581575e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"516791f2b3c44db289647cf13406ab3a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"fd29542280ee4e93b98cc6435bac95cc": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a6318f96b86e4f78b7b83dbdb06650e3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"3b94b9df7c9d4e2e84bafab06d4c996e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9b41efcc8b6e4fb598528a9471c2ce09": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"2927debb8f1a4f8896e8e3e64672aa3b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_ee01f994e4884ee289af2076c2418f9c",
"IPY_MODEL_25c4ae34365547afbf0757d7a1983eb9",
"IPY_MODEL_3272acbdf75f4dc5a54058638cd5b3ca"
],
"layout": "IPY_MODEL_6884a271eac1481ca360bc792d61ae48"
}
},
"ee01f994e4884ee289af2076c2418f9c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_000629234cfe477b971b14ca9cec62bb",
"placeholder": "​",
"style": "IPY_MODEL_88cc1d67e50d4539b50334f8661e1910",
"value": "Downloading builder script: 100%"
}
},
"25c4ae34365547afbf0757d7a1983eb9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_962f9d867fd844c8b4810bf394821f96",
"max": 6771,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_aeb7ffe019804fb28c64cd406dc1f118",
"value": 6771
}
},
"3272acbdf75f4dc5a54058638cd5b3ca": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c3dbae8b12c64860a2fb9332dd105144",
"placeholder": "​",
"style": "IPY_MODEL_661a6a170eb64681816b2ba3d0d664b2",
"value": " 6.77k/6.77k [00:00&lt;00:00, 235kB/s]"
}
},
"6884a271eac1481ca360bc792d61ae48": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"000629234cfe477b971b14ca9cec62bb": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"88cc1d67e50d4539b50334f8661e1910": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"962f9d867fd844c8b4810bf394821f96": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"aeb7ffe019804fb28c64cd406dc1f118": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"c3dbae8b12c64860a2fb9332dd105144": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"661a6a170eb64681816b2ba3d0d664b2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"95b0db5c543f43eebf63635730b7037d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_41118f6465ce48c39dec15b4d1103952",
"IPY_MODEL_43e310d1238a4b409748ec997e8b0862",
"IPY_MODEL_99cbf6b355344cbdb355aef2885f9031"
],
"layout": "IPY_MODEL_402feee4662a433bbcf178a79302a770"
}
},
"41118f6465ce48c39dec15b4d1103952": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_4f7125ae83804de0a29f4bc79ed64684",
"placeholder": "​",
"style": "IPY_MODEL_b96c6005b2d145639ee11f7d884fa60f",
"value": "Downloading (…)lve/main/config.json: 100%"
}
},
"43e310d1238a4b409748ec997e8b0862": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_cb5d668913694d928fa866c141574f45",
"max": 483,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_5e4c10ef1d7c489e920cee620fbd3f98",
"value": 483
}
},
"99cbf6b355344cbdb355aef2885f9031": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_6e65358a194b4688810ff2811401500d",
"placeholder": "​",
"style": "IPY_MODEL_27dae2fa50ef45678e14b873371e80ae",
"value": " 483/483 [00:00&lt;00:00, 14.0kB/s]"
}
},
"402feee4662a433bbcf178a79302a770": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"4f7125ae83804de0a29f4bc79ed64684": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b96c6005b2d145639ee11f7d884fa60f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"cb5d668913694d928fa866c141574f45": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"5e4c10ef1d7c489e920cee620fbd3f98": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"6e65358a194b4688810ff2811401500d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"27dae2fa50ef45678e14b873371e80ae": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"d068bd0702fe4088b8f39a5c950d6e37": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_b526822b801249c4ac5f4c39a8cbb926",
"IPY_MODEL_a89e11a023c7463794f82cf2ac63df37",
"IPY_MODEL_eb1edbcadfd649a0ba0532c85079173a"
],
"layout": "IPY_MODEL_1a3c5460e6de4d8c897e51dea64851da"
}
},
"b526822b801249c4ac5f4c39a8cbb926": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_544e750a299545f08b9c5938228d9c12",
"placeholder": "​",
"style": "IPY_MODEL_e1f7c2e195324494bee2e45e1930e4f7",
"value": "Downloading (…)&quot;pytorch_model.bin&quot;;: 100%"
}
},
"a89e11a023c7463794f82cf2ac63df37": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_97a00339991e4f27bae09592918eccd4",
"max": 267967963,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_0cd8237e1fb74ea4ad2c7c5973c9081b",
"value": 267967963
}
},
"eb1edbcadfd649a0ba0532c85079173a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b24abd4b909f44eda2347a43b2e8c719",
"placeholder": "​",
"style": "IPY_MODEL_6f12fee2b06d41c3855fd50df4c53841",
"value": " 268M/268M [00:02&lt;00:00, 106MB/s]"
}
},
"1a3c5460e6de4d8c897e51dea64851da": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"544e750a299545f08b9c5938228d9c12": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e1f7c2e195324494bee2e45e1930e4f7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"97a00339991e4f27bae09592918eccd4": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"0cd8237e1fb74ea4ad2c7c5973c9081b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"b24abd4b909f44eda2347a43b2e8c719": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6f12fee2b06d41c3855fd50df4c53841": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment