Skip to content

Instantly share code, notes, and snippets.

@MaheKarim
Last active June 2, 2022 12:02
Show Gist options
  • Save MaheKarim/4c8186afddafe798f8f39d13ed2a574a to your computer and use it in GitHub Desktop.
Save MaheKarim/4c8186afddafe798f8f39d13ed2a574a to your computer and use it in GitHub Desktop.
Label Encoding Class.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Copy of Untitled2.ipynb",
"provenance": [],
"authorship_tag": "ABX9TyNzRQ8XfZJh1H7w3fJzjkXo",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/MaheKarim/4c8186afddafe798f8f39d13ed2a574a/copy-of-untitled2.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "AVr9fHqpcuaA"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"source": [
"df = pd.read_csv(\"/content/lab_label.txt\")\n",
"df"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 407
},
"id": "YfDnvGlrdLjI",
"outputId": "a1101f36-9278-4ea1-d58b-d0385b1e3ea5"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
" <div id=\"df-6e879e0b-0ee9-437a-aa75-511470a52b2e\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>District</th>\n",
" <th>Size</th>\n",
" <th>Population</th>\n",
" <th>Speciality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Dhaka</td>\n",
" <td>1432</td>\n",
" <td>2250000</td>\n",
" <td>Administrative</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gazipur</td>\n",
" <td>879</td>\n",
" <td>567984</td>\n",
" <td>Industry</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Narayanganj</td>\n",
" <td>576</td>\n",
" <td>53426</td>\n",
" <td>Industry</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Rajbari</td>\n",
" <td>897</td>\n",
" <td>65899</td>\n",
" <td>Agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Chittagong</td>\n",
" <td>1234</td>\n",
" <td>1345566</td>\n",
" <td>Industry</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Cox's Bazer</td>\n",
" <td>456</td>\n",
" <td>46567</td>\n",
" <td>Tourist</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Bandarban</td>\n",
" <td>345</td>\n",
" <td>67579</td>\n",
" <td>Tourist</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Khustia</td>\n",
" <td>432</td>\n",
" <td>57798</td>\n",
" <td>Business</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Feni</td>\n",
" <td>543</td>\n",
" <td>67890</td>\n",
" <td>Agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Comilla</td>\n",
" <td>564</td>\n",
" <td>77898</td>\n",
" <td>Tourist</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Barisal</td>\n",
" <td>577</td>\n",
" <td>89750</td>\n",
" <td>Agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Faridpur</td>\n",
" <td>567</td>\n",
" <td>77650</td>\n",
" <td>Agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Rangpur</td>\n",
" <td>575</td>\n",
" <td>78966</td>\n",
" <td>Agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Chadpur</td>\n",
" <td>876</td>\n",
" <td>67789</td>\n",
" <td>Tourist</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-6e879e0b-0ee9-437a-aa75-511470a52b2e')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-6e879e0b-0ee9-437a-aa75-511470a52b2e button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-6e879e0b-0ee9-437a-aa75-511470a52b2e');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
],
"text/plain": [
" District Size Population Speciality\n",
"0 Dhaka 1432 2250000 Administrative\n",
"1 Gazipur 879 567984 Industry\n",
"2 Narayanganj 576 53426 Industry\n",
"3 Rajbari 897 65899 Agriculture\n",
"4 Chittagong 1234 1345566 Industry\n",
"5 Cox's Bazer 456 46567 Tourist\n",
"6 Bandarban 345 67579 Tourist\n",
"7 Khustia 432 57798 Business\n",
"8 Feni 543 67890 Agriculture\n",
"9 Comilla 564 77898 Tourist\n",
"10 Barisal 577 89750 Agriculture\n",
"11 Faridpur 567 77650 Agriculture\n",
"12 Rangpur 575 78966 Agriculture\n",
"13 Chadpur 876 67789 Tourist"
]
},
"metadata": {},
"execution_count": 2
}
]
},
{
"cell_type": "code",
"source": [
"df.dtypes"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Y_bJx20idb-E",
"outputId": "e81d9f76-13fd-4da3-ff61-5da6b3a633bf"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"District object\n",
"Size int64\n",
"Population int64\n",
"Speciality object\n",
"dtype: object"
]
},
"metadata": {},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"source": [
"from sklearn.preprocessing import LabelEncoder"
],
"metadata": {
"id": "6Uygom6mdgki"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"lb = LabelEncoder()\n",
"df[\"Filterize\"] = lb.fit_transform(df[\"District\"])"
],
"metadata": {
"id": "h0m0zwprdub0"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 407
},
"id": "4M2l-Otzdxsh",
"outputId": "74d11b6b-5141-4d02-9690-d6d66e46e906"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
" <div id=\"df-4e6c1b7b-c12b-493b-af24-325296592427\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>District</th>\n",
" <th>Size</th>\n",
" <th>Population</th>\n",
" <th>Speciality</th>\n",
" <th>New District</th>\n",
" <th>Filterize</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Dhaka</td>\n",
" <td>1432</td>\n",
" <td>2250000</td>\n",
" <td>Administrative</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gazipur</td>\n",
" <td>879</td>\n",
" <td>567984</td>\n",
" <td>Industry</td>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Narayanganj</td>\n",
" <td>576</td>\n",
" <td>53426</td>\n",
" <td>Industry</td>\n",
" <td>11</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Rajbari</td>\n",
" <td>897</td>\n",
" <td>65899</td>\n",
" <td>Agriculture</td>\n",
" <td>12</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Chittagong</td>\n",
" <td>1234</td>\n",
" <td>1345566</td>\n",
" <td>Industry</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Cox's Bazer</td>\n",
" <td>456</td>\n",
" <td>46567</td>\n",
" <td>Tourist</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Bandarban</td>\n",
" <td>345</td>\n",
" <td>67579</td>\n",
" <td>Tourist</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Khustia</td>\n",
" <td>432</td>\n",
" <td>57798</td>\n",
" <td>Business</td>\n",
" <td>10</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Feni</td>\n",
" <td>543</td>\n",
" <td>67890</td>\n",
" <td>Agriculture</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Comilla</td>\n",
" <td>564</td>\n",
" <td>77898</td>\n",
" <td>Tourist</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Barisal</td>\n",
" <td>577</td>\n",
" <td>89750</td>\n",
" <td>Agriculture</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Faridpur</td>\n",
" <td>567</td>\n",
" <td>77650</td>\n",
" <td>Agriculture</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Rangpur</td>\n",
" <td>575</td>\n",
" <td>78966</td>\n",
" <td>Agriculture</td>\n",
" <td>13</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Chadpur</td>\n",
" <td>876</td>\n",
" <td>67789</td>\n",
" <td>Tourist</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-4e6c1b7b-c12b-493b-af24-325296592427')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-4e6c1b7b-c12b-493b-af24-325296592427 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-4e6c1b7b-c12b-493b-af24-325296592427');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
],
"text/plain": [
" District Size Population Speciality New District Filterize\n",
"0 Dhaka 1432 2250000 Administrative 6 6\n",
"1 Gazipur 879 567984 Industry 9 9\n",
"2 Narayanganj 576 53426 Industry 11 11\n",
"3 Rajbari 897 65899 Agriculture 12 12\n",
"4 Chittagong 1234 1345566 Industry 3 3\n",
"5 Cox's Bazer 456 46567 Tourist 5 5\n",
"6 Bandarban 345 67579 Tourist 0 0\n",
"7 Khustia 432 57798 Business 10 10\n",
"8 Feni 543 67890 Agriculture 8 8\n",
"9 Comilla 564 77898 Tourist 4 4\n",
"10 Barisal 577 89750 Agriculture 1 1\n",
"11 Faridpur 567 77650 Agriculture 7 7\n",
"12 Rangpur 575 78966 Agriculture 13 13\n",
"13 Chadpur 876 67789 Tourist 2 2"
]
},
"metadata": {},
"execution_count": 10
}
]
},
{
"cell_type": "code",
"source": [
"df[\"Special Encoding\"] = lb.fit_transform(df[\"Speciality\"])\n",
"df"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 407
},
"id": "9byhtu5Jehyc",
"outputId": "06d825d7-d32c-41bc-dcbe-9b3b566c95bf"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
" <div id=\"df-74745199-e23c-41f9-8e16-f9bbcf43d553\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>District</th>\n",
" <th>Size</th>\n",
" <th>Population</th>\n",
" <th>Speciality</th>\n",
" <th>New District</th>\n",
" <th>Filterize</th>\n",
" <th>Special Encoding</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Dhaka</td>\n",
" <td>1432</td>\n",
" <td>2250000</td>\n",
" <td>Administrative</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gazipur</td>\n",
" <td>879</td>\n",
" <td>567984</td>\n",
" <td>Industry</td>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Narayanganj</td>\n",
" <td>576</td>\n",
" <td>53426</td>\n",
" <td>Industry</td>\n",
" <td>11</td>\n",
" <td>11</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Rajbari</td>\n",
" <td>897</td>\n",
" <td>65899</td>\n",
" <td>Agriculture</td>\n",
" <td>12</td>\n",
" <td>12</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Chittagong</td>\n",
" <td>1234</td>\n",
" <td>1345566</td>\n",
" <td>Industry</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Cox's Bazer</td>\n",
" <td>456</td>\n",
" <td>46567</td>\n",
" <td>Tourist</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Bandarban</td>\n",
" <td>345</td>\n",
" <td>67579</td>\n",
" <td>Tourist</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Khustia</td>\n",
" <td>432</td>\n",
" <td>57798</td>\n",
" <td>Business</td>\n",
" <td>10</td>\n",
" <td>10</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Feni</td>\n",
" <td>543</td>\n",
" <td>67890</td>\n",
" <td>Agriculture</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Comilla</td>\n",
" <td>564</td>\n",
" <td>77898</td>\n",
" <td>Tourist</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Barisal</td>\n",
" <td>577</td>\n",
" <td>89750</td>\n",
" <td>Agriculture</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Faridpur</td>\n",
" <td>567</td>\n",
" <td>77650</td>\n",
" <td>Agriculture</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Rangpur</td>\n",
" <td>575</td>\n",
" <td>78966</td>\n",
" <td>Agriculture</td>\n",
" <td>13</td>\n",
" <td>13</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Chadpur</td>\n",
" <td>876</td>\n",
" <td>67789</td>\n",
" <td>Tourist</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-74745199-e23c-41f9-8e16-f9bbcf43d553')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-74745199-e23c-41f9-8e16-f9bbcf43d553 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-74745199-e23c-41f9-8e16-f9bbcf43d553');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
],
"text/plain": [
" District Size Population ... New District Filterize Special Encoding\n",
"0 Dhaka 1432 2250000 ... 6 6 0\n",
"1 Gazipur 879 567984 ... 9 9 3\n",
"2 Narayanganj 576 53426 ... 11 11 3\n",
"3 Rajbari 897 65899 ... 12 12 1\n",
"4 Chittagong 1234 1345566 ... 3 3 3\n",
"5 Cox's Bazer 456 46567 ... 5 5 4\n",
"6 Bandarban 345 67579 ... 0 0 4\n",
"7 Khustia 432 57798 ... 10 10 2\n",
"8 Feni 543 67890 ... 8 8 1\n",
"9 Comilla 564 77898 ... 4 4 4\n",
"10 Barisal 577 89750 ... 1 1 1\n",
"11 Faridpur 567 77650 ... 7 7 1\n",
"12 Rangpur 575 78966 ... 13 13 1\n",
"13 Chadpur 876 67789 ... 2 2 4\n",
"\n",
"[14 rows x 7 columns]"
]
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"source": [
"from sklearn.preprocessing import OneHotEncoder"
],
"metadata": {
"id": "Jc03ozi-iDh5"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"oh = OneHotEncoder(sparse=False,\n",
" handle_unknown='error',\n",
" drop='first')"
],
"metadata": {
"id": "oyJoI2XMiP0_"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"oh = oh.fit_transform(df[['District']])"
],
"metadata": {
"id": "0ipbSTJ-iz8V"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"oh"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "cRXEGagzi4GI",
"outputId": "f949bded-82b5-4f4e-d0c1-491061dc5e2e"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],\n",
" [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],\n",
" [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n",
" [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],\n",
" [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])"
]
},
"metadata": {},
"execution_count": 21
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment