Skip to content

Instantly share code, notes, and snippets.

@prateekiiest
Created July 28, 2020 03:23
Show Gist options
  • Save prateekiiest/601341c0d9ad3add8f5dceb9bda770a6 to your computer and use it in GitHub Desktop.
Save prateekiiest/601341c0d9ad3add8f5dceb9bda770a6 to your computer and use it in GitHub Desktop.
vaccine prediction on dummy data
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"data_new.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>UUID</th>\n",
" <th>Gender</th>\n",
" <th>Age</th>\n",
" <th>Profession</th>\n",
" <th>Locality/Pin Code</th>\n",
" <th>Rate of infection in that zone</th>\n",
" <th>Pre-existing medical conditions</th>\n",
" <th>Travel history in the last 1 month</th>\n",
" <th>Coming in contact with someone who has been diagnosed with Covid 19</th>\n",
" <th>Do you have any kind of Covid 19 symptoms</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3445-8574-6625-7046</td>\n",
" <td>F</td>\n",
" <td>59</td>\n",
" <td>Blue Collar worker</td>\n",
" <td>586102</td>\n",
" <td>0.9</td>\n",
" <td>None</td>\n",
" <td>N</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4850-7196-6694-3565</td>\n",
" <td>M</td>\n",
" <td>1</td>\n",
" <td>Public Policy</td>\n",
" <td>585275</td>\n",
" <td>0.9</td>\n",
" <td>None</td>\n",
" <td>N</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1735-1721-7830-2262</td>\n",
" <td>M</td>\n",
" <td>38</td>\n",
" <td>Blue Collar worker</td>\n",
" <td>570020</td>\n",
" <td>0.8</td>\n",
" <td>Pulmonary</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5589-4350-6261-4583</td>\n",
" <td>F</td>\n",
" <td>29</td>\n",
" <td>Bank</td>\n",
" <td>563114</td>\n",
" <td>1.1</td>\n",
" <td>Diabetes</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8748-8954-4871-2639</td>\n",
" <td>F</td>\n",
" <td>32</td>\n",
" <td>Work from Home</td>\n",
" <td>581319</td>\n",
" <td>1.0</td>\n",
" <td>None</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" UUID Gender Age Profession Locality/Pin Code \\\n",
"0 3445-8574-6625-7046 F 59 Blue Collar worker 586102 \n",
"1 4850-7196-6694-3565 M 1 Public Policy 585275 \n",
"2 1735-1721-7830-2262 M 38 Blue Collar worker 570020 \n",
"3 5589-4350-6261-4583 F 29 Bank 563114 \n",
"4 8748-8954-4871-2639 F 32 Work from Home 581319 \n",
"\n",
" Rate of infection in that zone Pre-existing medical conditions \\\n",
"0 0.9 None \n",
"1 0.9 None \n",
"2 0.8 Pulmonary \n",
"3 1.1 Diabetes \n",
"4 1.0 None \n",
"\n",
" Travel history in the last 1 month \\\n",
"0 N \n",
"1 N \n",
"2 Y \n",
"3 N \n",
"4 N \n",
"\n",
" Coming in contact with someone who has been diagnosed with Covid 19 \\\n",
"0 N \n",
"1 N \n",
"2 N \n",
"3 Y \n",
"4 Y \n",
"\n",
" Do you have any kind of Covid 19 symptoms \n",
"0 Y \n",
"1 Y \n",
"2 Y \n",
"3 Y \n",
"4 N "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.cluster import KMeans"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>UUID</th>\n",
" <th>Gender</th>\n",
" <th>Age</th>\n",
" <th>Profession</th>\n",
" <th>Locality/Pin Code</th>\n",
" <th>Rate of infection in that zone</th>\n",
" <th>Pre-existing medical conditions</th>\n",
" <th>Travel history in the last 1 month</th>\n",
" <th>Coming in contact with someone who has been diagnosed with Covid 19</th>\n",
" <th>Do you have any kind of Covid 19 symptoms</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1735-1721-7830-2262</td>\n",
" <td>M</td>\n",
" <td>38</td>\n",
" <td>Blue Collar worker</td>\n",
" <td>570020</td>\n",
" <td>0.8</td>\n",
" <td>Pulmonary</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5589-4350-6261-4583</td>\n",
" <td>F</td>\n",
" <td>29</td>\n",
" <td>Bank</td>\n",
" <td>563114</td>\n",
" <td>1.1</td>\n",
" <td>Diabetes</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8748-8954-4871-2639</td>\n",
" <td>F</td>\n",
" <td>32</td>\n",
" <td>Work from Home</td>\n",
" <td>581319</td>\n",
" <td>1.0</td>\n",
" <td>None</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>9059-2726-5895-7502</td>\n",
" <td>M</td>\n",
" <td>30</td>\n",
" <td>Blue Collar worker</td>\n",
" <td>560051</td>\n",
" <td>0.8</td>\n",
" <td>Kidney Ailments</td>\n",
" <td>Y</td>\n",
" <td>Y</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7065-8302-9904-5184</td>\n",
" <td>F</td>\n",
" <td>69</td>\n",
" <td>Blue Collar worker</td>\n",
" <td>574286</td>\n",
" <td>1.0</td>\n",
" <td>Heart</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>995</th>\n",
" <td>9981-8093-6006-7258</td>\n",
" <td>M</td>\n",
" <td>42</td>\n",
" <td>Bank</td>\n",
" <td>560009</td>\n",
" <td>1.0</td>\n",
" <td>Diabetes</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" </tr>\n",
" <tr>\n",
" <th>996</th>\n",
" <td>5077-3958-1800-5552</td>\n",
" <td>F</td>\n",
" <td>26</td>\n",
" <td>Blue Collar worker</td>\n",
" <td>591152</td>\n",
" <td>1.1</td>\n",
" <td>None</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>997</th>\n",
" <td>6521-4528-5718-6437</td>\n",
" <td>M</td>\n",
" <td>70</td>\n",
" <td>Public Policy</td>\n",
" <td>586101</td>\n",
" <td>0.8</td>\n",
" <td>Pulmonary</td>\n",
" <td>Y</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" </tr>\n",
" <tr>\n",
" <th>998</th>\n",
" <td>4937-5598-1107-1287</td>\n",
" <td>F</td>\n",
" <td>30</td>\n",
" <td>Health Care</td>\n",
" <td>577116</td>\n",
" <td>0.8</td>\n",
" <td>Heart</td>\n",
" <td>N</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999</th>\n",
" <td>5305-3729-1958-3444</td>\n",
" <td>F</td>\n",
" <td>51</td>\n",
" <td>Blue Collar worker</td>\n",
" <td>577232</td>\n",
" <td>1.1</td>\n",
" <td>None</td>\n",
" <td>Y</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>998 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" UUID Gender Age Profession Locality/Pin Code \\\n",
"2 1735-1721-7830-2262 M 38 Blue Collar worker 570020 \n",
"3 5589-4350-6261-4583 F 29 Bank 563114 \n",
"4 8748-8954-4871-2639 F 32 Work from Home 581319 \n",
"5 9059-2726-5895-7502 M 30 Blue Collar worker 560051 \n",
"6 7065-8302-9904-5184 F 69 Blue Collar worker 574286 \n",
".. ... ... ... ... ... \n",
"995 9981-8093-6006-7258 M 42 Bank 560009 \n",
"996 5077-3958-1800-5552 F 26 Blue Collar worker 591152 \n",
"997 6521-4528-5718-6437 M 70 Public Policy 586101 \n",
"998 4937-5598-1107-1287 F 30 Health Care 577116 \n",
"999 5305-3729-1958-3444 F 51 Blue Collar worker 577232 \n",
"\n",
" Rate of infection in that zone Pre-existing medical conditions \\\n",
"2 0.8 Pulmonary \n",
"3 1.1 Diabetes \n",
"4 1.0 None \n",
"5 0.8 Kidney Ailments \n",
"6 1.0 Heart \n",
".. ... ... \n",
"995 1.0 Diabetes \n",
"996 1.1 None \n",
"997 0.8 Pulmonary \n",
"998 0.8 Heart \n",
"999 1.1 None \n",
"\n",
" Travel history in the last 1 month \\\n",
"2 Y \n",
"3 N \n",
"4 N \n",
"5 Y \n",
"6 N \n",
".. ... \n",
"995 N \n",
"996 Y \n",
"997 Y \n",
"998 N \n",
"999 Y \n",
"\n",
" Coming in contact with someone who has been diagnosed with Covid 19 \\\n",
"2 N \n",
"3 Y \n",
"4 Y \n",
"5 Y \n",
"6 Y \n",
".. ... \n",
"995 Y \n",
"996 N \n",
"997 Y \n",
"998 N \n",
"999 Y \n",
"\n",
" Do you have any kind of Covid 19 symptoms \n",
"2 Y \n",
"3 Y \n",
"4 N \n",
"5 Y \n",
"6 N \n",
".. ... \n",
"995 N \n",
"996 Y \n",
"997 N \n",
"998 Y \n",
"999 N \n",
"\n",
"[998 rows x 10 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"X = df.to_numpy()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "could not convert string to float: 'N'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-10-072b548222e0>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mkmeans\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mKMeans\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mn_clusters\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m~\\anaconda3\\lib\\site-packages\\sklearn\\cluster\\_kmeans.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m 857\u001b[0m \u001b[0morder\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"C\"\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcopy_x\u001b[0m \u001b[1;32melse\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 858\u001b[0m X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32],\n\u001b[1;32m--> 859\u001b[1;33m order=order, copy=self.copy_x)\n\u001b[0m\u001b[0;32m 860\u001b[0m \u001b[1;31m# verify that the number of samples given is larger than k\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 861\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0m_num_samples\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m<\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mn_clusters\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m 529\u001b[0m \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcasting\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"unsafe\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 530\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 531\u001b[1;33m \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 532\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 533\u001b[0m raise ValueError(\"Complex data not supported\\n\"\n",
"\u001b[1;32m~\\anaconda3\\lib\\site-packages\\numpy\\core\\_asarray.py\u001b[0m in \u001b[0;36masarray\u001b[1;34m(a, dtype, order)\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 84\u001b[0m \"\"\"\n\u001b[1;32m---> 85\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 86\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 87\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mValueError\u001b[0m: could not convert string to float: 'N'"
]
}
],
"source": [
"kmeans = KMeans(n_clusters=2, random_state=0).fit(X)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import OneHotEncoder"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"enc = OneHotEncoder(handle_unknown='ignore')\n",
"gend_enc = [['Male', 1], ['Other', 3], ['Female', 2]]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"OneHotEncoder(categories='auto', drop=None, dtype=<class 'numpy.float64'>,\n",
" handle_unknown='ignore', sparse=True)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"enc.fit(gend_enc)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting kmodes\n",
" Downloading kmodes-0.10.2-py2.py3-none-any.whl (18 kB)\n",
"Requirement already satisfied: numpy>=1.10.4 in c:\\users\\prate\\anaconda3\\lib\\site-packages (from kmodes) (1.18.1)\n",
"Requirement already satisfied: scipy>=0.13.3 in c:\\users\\prate\\anaconda3\\lib\\site-packages (from kmodes) (1.4.1)\n",
"Requirement already satisfied: joblib>=0.11 in c:\\users\\prate\\anaconda3\\lib\\site-packages (from kmodes) (0.14.1)\n",
"Requirement already satisfied: scikit-learn>=0.19.0 in c:\\users\\prate\\anaconda3\\lib\\site-packages (from kmodes) (0.22.1)\n",
"Installing collected packages: kmodes\n",
"Successfully installed kmodes-0.10.2\n"
]
}
],
"source": [
"!pip install kmodes"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"from kmodes.kmodes import KModes\n"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 1, iteration: 1/100, moves: 71, cost: 6444.0\n",
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 2, iteration: 1/100, moves: 222, cost: 6429.0\n",
"Run 2, iteration: 2/100, moves: 22, cost: 6429.0\n",
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 3, iteration: 1/100, moves: 209, cost: 6445.0\n",
"Run 3, iteration: 2/100, moves: 60, cost: 6445.0\n",
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 4, iteration: 1/100, moves: 220, cost: 6416.0\n",
"Run 4, iteration: 2/100, moves: 117, cost: 6416.0\n",
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 5, iteration: 1/100, moves: 181, cost: 6411.0\n",
"Run 5, iteration: 2/100, moves: 3, cost: 6411.0\n",
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 6, iteration: 1/100, moves: 245, cost: 6539.0\n",
"Run 6, iteration: 2/100, moves: 61, cost: 6510.0\n",
"Run 6, iteration: 3/100, moves: 1, cost: 6510.0\n",
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 7, iteration: 1/100, moves: 197, cost: 6553.0\n",
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 8, iteration: 1/100, moves: 247, cost: 6587.0\n",
"Run 8, iteration: 2/100, moves: 5, cost: 6587.0\n",
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 9, iteration: 1/100, moves: 204, cost: 6442.0\n",
"Run 9, iteration: 2/100, moves: 24, cost: 6442.0\n",
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 10, iteration: 1/100, moves: 180, cost: 6428.0\n",
"Run 10, iteration: 2/100, moves: 0, cost: 6428.0\n",
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 11, iteration: 1/100, moves: 241, cost: 6540.0\n",
"Run 11, iteration: 2/100, moves: 45, cost: 6540.0\n",
"Best run was number 5\n"
]
}
],
"source": [
"km = KModes(n_clusters=2, init='Huang', n_init=11, verbose=1)\n",
"# fit the clusters to the skills dataframe\n",
"clusters = km.fit_predict(df)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"kmodes = km.cluster_centroids_\n",
"shape = kmodes.shape\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([['1003-2909-7850-6803', 'M', '16', 'Bank', '586101', '0.9',\n",
" 'Cancer', 'Y', 'Y', 'Y'],\n",
" ['1025-4232-8458-4165', 'F', '18', 'Blue Collar worker', '583101',\n",
" '1.1', 'Kidney Ailments', 'N', 'N', 'N']], dtype='<U32')"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"kmodes"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "'memoryview' object is not callable",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-34-51a1c04d5f98>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mkmodes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m: 'memoryview' object is not callable"
]
}
],
"source": [
"kmodes."
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0,\n",
" 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0,\n",
" 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0,\n",
" 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,\n",
" 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1,\n",
" 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1,\n",
" 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,\n",
" 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0,\n",
" 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1,\n",
" 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1,\n",
" 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1,\n",
" 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1,\n",
" 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,\n",
" 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,\n",
" 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1,\n",
" 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1,\n",
" 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n",
" 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,\n",
" 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1,\n",
" 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1,\n",
" 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,\n",
" 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0,\n",
" 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,\n",
" 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1,\n",
" 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,\n",
" 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,\n",
" 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1,\n",
" 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1,\n",
" 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,\n",
" 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1,\n",
" 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0,\n",
" 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0,\n",
" 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1,\n",
" 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1,\n",
" 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,\n",
" 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1,\n",
" 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1,\n",
" 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1,\n",
" 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1,\n",
" 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0,\n",
" 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1,\n",
" 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1,\n",
" 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,\n",
" 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,\n",
" 1, 0, 0, 0, 0, 0, 1, 0, 1, 1], dtype=uint16)"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clusters"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns; sns.set() # for plot styling\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1000"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(clusters)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 1, iteration: 1/100, moves: 0, cost: 7298.0\n",
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 1, iteration: 1/100, moves: 16, cost: 6468.0\n",
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 1, iteration: 1/100, moves: 10, cost: 6219.0\n",
"Init: initializing centroids\n",
"Init: initializing clusters\n",
"Starting iterations...\n",
"Run 1, iteration: 1/100, moves: 7, cost: 6001.0\n"
]
}
],
"source": [
"cost = []\n",
"for num_clusters in list(range(1,5)):\n",
" kmode = KModes(n_clusters=num_clusters, init = \"Cao\", n_init = 1, verbose=1)\n",
" kmode.fit_predict(df)\n",
" cost.append(kmode.cost_)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Elbow Method to find optimal cluster"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x21d89edf6c8>]"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"y = np.array([i for i in range(1,5,1)])\n",
"plt.plot(y,cost)\n"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"df_copy = df.copy()\n"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"df_new = df_copy.reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"clustersDf = pd.DataFrame(clusters)\n",
"clustersDf.columns = ['cluster_predicted']\n",
"combinedDf = pd.concat([df_new, clustersDf], axis = 1).reset_index()\n",
"combinedDf = combinedDf.drop(['index', 'level_0'], axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>UUID</th>\n",
" <th>Gender</th>\n",
" <th>Age</th>\n",
" <th>Profession</th>\n",
" <th>Locality/Pin Code</th>\n",
" <th>Rate of infection in that zone</th>\n",
" <th>Pre-existing medical conditions</th>\n",
" <th>Travel history in the last 1 month</th>\n",
" <th>Coming in contact with someone who has been diagnosed with Covid 19</th>\n",
" <th>Do you have any kind of Covid 19 symptoms</th>\n",
" <th>cluster_predicted</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3445-8574-6625-7046</td>\n",
" <td>F</td>\n",
" <td>59</td>\n",
" <td>Blue Collar worker</td>\n",
" <td>586102</td>\n",
" <td>0.9</td>\n",
" <td>None</td>\n",
" <td>N</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4850-7196-6694-3565</td>\n",
" <td>M</td>\n",
" <td>1</td>\n",
" <td>Public Policy</td>\n",
" <td>585275</td>\n",
" <td>0.9</td>\n",
" <td>None</td>\n",
" <td>N</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1735-1721-7830-2262</td>\n",
" <td>M</td>\n",
" <td>38</td>\n",
" <td>Blue Collar worker</td>\n",
" <td>570020</td>\n",
" <td>0.8</td>\n",
" <td>Pulmonary</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5589-4350-6261-4583</td>\n",
" <td>F</td>\n",
" <td>29</td>\n",
" <td>Bank</td>\n",
" <td>563114</td>\n",
" <td>1.1</td>\n",
" <td>Diabetes</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" <td>Y</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8748-8954-4871-2639</td>\n",
" <td>F</td>\n",
" <td>32</td>\n",
" <td>Work from Home</td>\n",
" <td>581319</td>\n",
" <td>1.0</td>\n",
" <td>None</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>995</th>\n",
" <td>9981-8093-6006-7258</td>\n",
" <td>M</td>\n",
" <td>42</td>\n",
" <td>Bank</td>\n",
" <td>560009</td>\n",
" <td>1.0</td>\n",
" <td>Diabetes</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>996</th>\n",
" <td>5077-3958-1800-5552</td>\n",
" <td>F</td>\n",
" <td>26</td>\n",
" <td>Blue Collar worker</td>\n",
" <td>591152</td>\n",
" <td>1.1</td>\n",
" <td>None</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>997</th>\n",
" <td>6521-4528-5718-6437</td>\n",
" <td>M</td>\n",
" <td>70</td>\n",
" <td>Public Policy</td>\n",
" <td>586101</td>\n",
" <td>0.8</td>\n",
" <td>Pulmonary</td>\n",
" <td>Y</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>998</th>\n",
" <td>4937-5598-1107-1287</td>\n",
" <td>F</td>\n",
" <td>30</td>\n",
" <td>Health Care</td>\n",
" <td>577116</td>\n",
" <td>0.8</td>\n",
" <td>Heart</td>\n",
" <td>N</td>\n",
" <td>N</td>\n",
" <td>Y</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999</th>\n",
" <td>5305-3729-1958-3444</td>\n",
" <td>F</td>\n",
" <td>51</td>\n",
" <td>Blue Collar worker</td>\n",
" <td>577232</td>\n",
" <td>1.1</td>\n",
" <td>None</td>\n",
" <td>Y</td>\n",
" <td>Y</td>\n",
" <td>N</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1000 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" UUID Gender Age Profession Locality/Pin Code \\\n",
"0 3445-8574-6625-7046 F 59 Blue Collar worker 586102 \n",
"1 4850-7196-6694-3565 M 1 Public Policy 585275 \n",
"2 1735-1721-7830-2262 M 38 Blue Collar worker 570020 \n",
"3 5589-4350-6261-4583 F 29 Bank 563114 \n",
"4 8748-8954-4871-2639 F 32 Work from Home 581319 \n",
".. ... ... ... ... ... \n",
"995 9981-8093-6006-7258 M 42 Bank 560009 \n",
"996 5077-3958-1800-5552 F 26 Blue Collar worker 591152 \n",
"997 6521-4528-5718-6437 M 70 Public Policy 586101 \n",
"998 4937-5598-1107-1287 F 30 Health Care 577116 \n",
"999 5305-3729-1958-3444 F 51 Blue Collar worker 577232 \n",
"\n",
" Rate of infection in that zone Pre-existing medical conditions \\\n",
"0 0.9 None \n",
"1 0.9 None \n",
"2 0.8 Pulmonary \n",
"3 1.1 Diabetes \n",
"4 1.0 None \n",
".. ... ... \n",
"995 1.0 Diabetes \n",
"996 1.1 None \n",
"997 0.8 Pulmonary \n",
"998 0.8 Heart \n",
"999 1.1 None \n",
"\n",
" Travel history in the last 1 month \\\n",
"0 N \n",
"1 N \n",
"2 Y \n",
"3 N \n",
"4 N \n",
".. ... \n",
"995 N \n",
"996 Y \n",
"997 Y \n",
"998 N \n",
"999 Y \n",
"\n",
" Coming in contact with someone who has been diagnosed with Covid 19 \\\n",
"0 N \n",
"1 N \n",
"2 N \n",
"3 Y \n",
"4 Y \n",
".. ... \n",
"995 Y \n",
"996 N \n",
"997 Y \n",
"998 N \n",
"999 Y \n",
"\n",
" Do you have any kind of Covid 19 symptoms cluster_predicted \n",
"0 Y 1 \n",
"1 Y 0 \n",
"2 Y 0 \n",
"3 Y 0 \n",
"4 N 1 \n",
".. ... ... \n",
"995 N 0 \n",
"996 Y 1 \n",
"997 N 0 \n",
"998 Y 1 \n",
"999 N 1 \n",
"\n",
"[1000 rows x 11 columns]"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"combinedDf"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"cluster_0 = combinedDf[combinedDf['cluster_predicted'] == 0]\n",
"cluster_1 = combinedDf[combinedDf['cluster_predicted'] == 1]"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 582 entries, 1 to 997\n",
"Data columns (total 11 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 UUID 582 non-null object \n",
" 1 Gender 582 non-null object \n",
" 2 Age 582 non-null int64 \n",
" 3 Profession 582 non-null object \n",
" 4 Locality/Pin Code 582 non-null int64 \n",
" 5 Rate of infection in that zone 582 non-null float64\n",
" 6 Pre-existing medical conditions 582 non-null object \n",
" 7 Travel history in the last 1 month 582 non-null object \n",
" 8 Coming in contact with someone who has been diagnosed with Covid 19 582 non-null object \n",
" 9 Do you have any kind of Covid 19 symptoms 582 non-null object \n",
" 10 cluster_predicted 582 non-null uint16 \n",
"dtypes: float64(1), int64(2), object(7), uint16(1)\n",
"memory usage: 51.2+ KB\n"
]
}
],
"source": [
"cluster_0.info()\n"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1080x360 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.subplots(figsize = (15,5))\n",
"sns.countplot(x=combinedDf['Age'],order=combinedDf['Age'].value_counts().index,hue=combinedDf['cluster_predicted'])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1080x360 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.subplots(figsize = (15,5))\n",
"sns.countplot(x=combinedDf['Profession'],order=combinedDf['Profession'].value_counts().index,hue=combinedDf['cluster_predicted'])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment