Created
November 16, 2020 04:46
-
-
Save projjal1/a5a941dc664f292bcf641bebc4e82edd to your computer and use it in GitHub Desktop.
Wine Class Prediction.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Wine Class Prediction.ipynb", | |
"provenance": [], | |
"authorship_tag": "ABX9TyOYWOhy9JCKZKvDLDEtnark", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/projjal1/a5a941dc664f292bcf641bebc4e82edd/wine-class-prediction.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "AlPpQ6_dLpW1" | |
}, | |
"source": [ | |
"Importing all the required modules for dataset analysis" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "vKRxyyIxG_JS" | |
}, | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"from sklearn.cluster import KMeans\n", | |
"from sklearn.preprocessing import StandardScaler" | |
], | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "7Ue0P3B2LvJ2" | |
}, | |
"source": [ | |
"Importing dataset from github repo" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "NWUUg3HXHrvU" | |
}, | |
"source": [ | |
"file_path='https://raw.githubusercontent.com/projjal1/datasets/master/wine-k-means.csv'" | |
], | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "yzx_FgoYLzc0" | |
}, | |
"source": [ | |
"Loading the dataset and looking at few rows " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "rxA6VwloHtzZ", | |
"outputId": "393afb1f-fa1c-46b6-fa7d-1e61347cbddb", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 212 | |
} | |
}, | |
"source": [ | |
"data=pd.read_csv(file_path)\n", | |
"data.head()" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Class</th>\n", | |
" <th>Alcohol</th>\n", | |
" <th>Malic Acid</th>\n", | |
" <th>Ash</th>\n", | |
" <th>Alkalinity of Ash</th>\n", | |
" <th>Magnesium</th>\n", | |
" <th>Total Phenols</th>\n", | |
" <th>Flavanoids</th>\n", | |
" <th>Non-flavanoid phenols</th>\n", | |
" <th>Proanthocyanins</th>\n", | |
" <th>Color Intensity</th>\n", | |
" <th>Hue</th>\n", | |
" <th>OD280/OD315 of diluted wines</th>\n", | |
" <th>Proline</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>14.23</td>\n", | |
" <td>1.71</td>\n", | |
" <td>2.43</td>\n", | |
" <td>15.6</td>\n", | |
" <td>127</td>\n", | |
" <td>2.80</td>\n", | |
" <td>3.06</td>\n", | |
" <td>0.28</td>\n", | |
" <td>2.29</td>\n", | |
" <td>5.64</td>\n", | |
" <td>1.04</td>\n", | |
" <td>3.92</td>\n", | |
" <td>1065</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>13.20</td>\n", | |
" <td>1.78</td>\n", | |
" <td>2.14</td>\n", | |
" <td>11.2</td>\n", | |
" <td>100</td>\n", | |
" <td>2.65</td>\n", | |
" <td>2.76</td>\n", | |
" <td>0.26</td>\n", | |
" <td>1.28</td>\n", | |
" <td>4.38</td>\n", | |
" <td>1.05</td>\n", | |
" <td>3.40</td>\n", | |
" <td>1050</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>13.16</td>\n", | |
" <td>2.36</td>\n", | |
" <td>2.67</td>\n", | |
" <td>18.6</td>\n", | |
" <td>101</td>\n", | |
" <td>2.80</td>\n", | |
" <td>3.24</td>\n", | |
" <td>0.30</td>\n", | |
" <td>2.81</td>\n", | |
" <td>5.68</td>\n", | |
" <td>1.03</td>\n", | |
" <td>3.17</td>\n", | |
" <td>1185</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>14.37</td>\n", | |
" <td>1.95</td>\n", | |
" <td>2.50</td>\n", | |
" <td>16.8</td>\n", | |
" <td>113</td>\n", | |
" <td>3.85</td>\n", | |
" <td>3.49</td>\n", | |
" <td>0.24</td>\n", | |
" <td>2.18</td>\n", | |
" <td>7.80</td>\n", | |
" <td>0.86</td>\n", | |
" <td>3.45</td>\n", | |
" <td>1480</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>13.24</td>\n", | |
" <td>2.59</td>\n", | |
" <td>2.87</td>\n", | |
" <td>21.0</td>\n", | |
" <td>118</td>\n", | |
" <td>2.80</td>\n", | |
" <td>2.69</td>\n", | |
" <td>0.39</td>\n", | |
" <td>1.82</td>\n", | |
" <td>4.32</td>\n", | |
" <td>1.04</td>\n", | |
" <td>2.93</td>\n", | |
" <td>735</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Class Alcohol Malic Acid ... Hue OD280/OD315 of diluted wines Proline\n", | |
"0 1 14.23 1.71 ... 1.04 3.92 1065\n", | |
"1 1 13.20 1.78 ... 1.05 3.40 1050\n", | |
"2 1 13.16 2.36 ... 1.03 3.17 1185\n", | |
"3 1 14.37 1.95 ... 0.86 3.45 1480\n", | |
"4 1 13.24 2.59 ... 1.04 2.93 735\n", | |
"\n", | |
"[5 rows x 14 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 3 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "-BRyRO2sL4-p" | |
}, | |
"source": [ | |
"Shuffling the dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "VKJ382DGH-ie", | |
"outputId": "7af5810a-5b68-4a5c-c242-ac2c9d72cc70", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 212 | |
} | |
}, | |
"source": [ | |
"df=data.sample(frac=1).reset_index(drop=True)\n", | |
"df.head()" | |
], | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Class</th>\n", | |
" <th>Alcohol</th>\n", | |
" <th>Malic Acid</th>\n", | |
" <th>Ash</th>\n", | |
" <th>Alkalinity of Ash</th>\n", | |
" <th>Magnesium</th>\n", | |
" <th>Total Phenols</th>\n", | |
" <th>Flavanoids</th>\n", | |
" <th>Non-flavanoid phenols</th>\n", | |
" <th>Proanthocyanins</th>\n", | |
" <th>Color Intensity</th>\n", | |
" <th>Hue</th>\n", | |
" <th>OD280/OD315 of diluted wines</th>\n", | |
" <th>Proline</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2</td>\n", | |
" <td>11.81</td>\n", | |
" <td>2.12</td>\n", | |
" <td>2.74</td>\n", | |
" <td>21.5</td>\n", | |
" <td>134</td>\n", | |
" <td>1.60</td>\n", | |
" <td>0.99</td>\n", | |
" <td>0.14</td>\n", | |
" <td>1.56</td>\n", | |
" <td>2.50</td>\n", | |
" <td>0.95</td>\n", | |
" <td>2.26</td>\n", | |
" <td>625</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>13.05</td>\n", | |
" <td>2.05</td>\n", | |
" <td>3.22</td>\n", | |
" <td>25.0</td>\n", | |
" <td>124</td>\n", | |
" <td>2.63</td>\n", | |
" <td>2.68</td>\n", | |
" <td>0.47</td>\n", | |
" <td>1.92</td>\n", | |
" <td>3.58</td>\n", | |
" <td>1.13</td>\n", | |
" <td>3.20</td>\n", | |
" <td>830</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>13.05</td>\n", | |
" <td>1.77</td>\n", | |
" <td>2.10</td>\n", | |
" <td>17.0</td>\n", | |
" <td>107</td>\n", | |
" <td>3.00</td>\n", | |
" <td>3.00</td>\n", | |
" <td>0.28</td>\n", | |
" <td>2.03</td>\n", | |
" <td>5.04</td>\n", | |
" <td>0.88</td>\n", | |
" <td>3.35</td>\n", | |
" <td>885</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2</td>\n", | |
" <td>11.61</td>\n", | |
" <td>1.35</td>\n", | |
" <td>2.70</td>\n", | |
" <td>20.0</td>\n", | |
" <td>94</td>\n", | |
" <td>2.74</td>\n", | |
" <td>2.92</td>\n", | |
" <td>0.29</td>\n", | |
" <td>2.49</td>\n", | |
" <td>2.65</td>\n", | |
" <td>0.96</td>\n", | |
" <td>3.26</td>\n", | |
" <td>680</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>14.38</td>\n", | |
" <td>3.59</td>\n", | |
" <td>2.28</td>\n", | |
" <td>16.0</td>\n", | |
" <td>102</td>\n", | |
" <td>3.25</td>\n", | |
" <td>3.17</td>\n", | |
" <td>0.27</td>\n", | |
" <td>2.19</td>\n", | |
" <td>4.90</td>\n", | |
" <td>1.04</td>\n", | |
" <td>3.44</td>\n", | |
" <td>1065</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Class Alcohol Malic Acid ... Hue OD280/OD315 of diluted wines Proline\n", | |
"0 2 11.81 2.12 ... 0.95 2.26 625\n", | |
"1 1 13.05 2.05 ... 1.13 3.20 830\n", | |
"2 1 13.05 1.77 ... 0.88 3.35 885\n", | |
"3 2 11.61 1.35 ... 0.96 3.26 680\n", | |
"4 1 14.38 3.59 ... 1.04 3.44 1065\n", | |
"\n", | |
"[5 rows x 14 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 4 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "jeGo2YBaJ8Ev", | |
"outputId": "77383776-3ef8-4394-9daa-9eb299fee086", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"source": [ | |
"df.shape" | |
], | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(178, 14)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 5 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "qjMaNXJGL_OG" | |
}, | |
"source": [ | |
"Extracting labels and values from the dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "rRlqbAmVISUE" | |
}, | |
"source": [ | |
"params=df.values\n", | |
"label=params[:,0]\n", | |
"values=params[:,1:]" | |
], | |
"execution_count": 6, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Egq-COo5JIcK", | |
"outputId": "023a08b9-c784-4c86-eb8d-6c5a1121fbd1", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"source": [ | |
"print(values[:3])\n", | |
"print(label[:3])" | |
], | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[[1.181e+01 2.120e+00 2.740e+00 2.150e+01 1.340e+02 1.600e+00 9.900e-01\n", | |
" 1.400e-01 1.560e+00 2.500e+00 9.500e-01 2.260e+00 6.250e+02]\n", | |
" [1.305e+01 2.050e+00 3.220e+00 2.500e+01 1.240e+02 2.630e+00 2.680e+00\n", | |
" 4.700e-01 1.920e+00 3.580e+00 1.130e+00 3.200e+00 8.300e+02]\n", | |
" [1.305e+01 1.770e+00 2.100e+00 1.700e+01 1.070e+02 3.000e+00 3.000e+00\n", | |
" 2.800e-01 2.030e+00 5.040e+00 8.800e-01 3.350e+00 8.850e+02]]\n", | |
"[2. 1. 1.]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "AoHgtju7MCfk" | |
}, | |
"source": [ | |
"Importing StandardScaler preprocessing library and normalizing the dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "cmqkK0NpJTit", | |
"outputId": "bf7b2554-19d9-4ea2-df34-0669769c0a41", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"source": [ | |
"scaler=StandardScaler()\n", | |
"scaler.fit(values)\n", | |
"norm_values=scaler.transform(values)\n", | |
"print(norm_values[:2])" | |
], | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[[-1.47072867 -0.19420791 1.36520822 0.60208828 2.4053986 -1.11380046\n", | |
" -1.04339216 -1.78765596 -0.05413743 -1.1065529 -0.03268321 -0.49673551\n", | |
" -0.38816832]\n", | |
" [ 0.06099988 -0.25704433 3.11977186 1.65308575 1.7032652 0.53660103\n", | |
" 0.6533116 0.87142004 0.57661286 -0.63937732 0.75703776 0.83096074\n", | |
" 0.2646529 ]]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "QiYb1JapMJPt" | |
}, | |
"source": [ | |
"Splitting the dataset into test and train data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "NQAthTEgKIjf" | |
}, | |
"source": [ | |
"from sklearn.model_selection import train_test_split\n", | |
"X_train,X_test,y_train,y_test=train_test_split(norm_values,label,test_size=0.33, random_state=42)" | |
], | |
"execution_count": 51, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "qwDCOxirMNI4" | |
}, | |
"source": [ | |
"Loading the Kmeans clustering model and training it" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "o-jTJp7IJv9w", | |
"outputId": "94e47a8d-c8e7-4afe-c0c6-3310a939a016", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"source": [ | |
"model=KMeans(max_iter=1000,n_clusters=5)\n", | |
"model.fit(X_train,y_train)" | |
], | |
"execution_count": 61, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=1000,\n", | |
" n_clusters=5, n_init=10, n_jobs=None, precompute_distances='auto',\n", | |
" random_state=None, tol=0.0001, verbose=0)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 61 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "fMzG9CO1MSUc" | |
}, | |
"source": [ | |
"Perform predictions on the test data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Bl9pgTUcJ5wM" | |
}, | |
"source": [ | |
"predictions=model.predict(X_test)" | |
], | |
"execution_count": 62, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "epKZF80FMWMb" | |
}, | |
"source": [ | |
"Looking at the accuracy score metric. Yohoo! we got 93%" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "282gUE6kKq3Z", | |
"outputId": "d95ffe7b-b953-4d69-d011-32f009122a50", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"source": [ | |
"from sklearn.metrics import accuracy_score\n", | |
"score=accuracy_score(y_test,predictions)\n", | |
"print(score)" | |
], | |
"execution_count": 63, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"0.9322033898305084\n" | |
], | |
"name": "stdout" | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment