Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save NataliiaRastoropova/9a94405133b05f9b470d635fd8defccc to your computer and use it in GitHub Desktop.
Save NataliiaRastoropova/9a94405133b05f9b470d635fd8defccc to your computer and use it in GitHub Desktop.
medium blog-post
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
"from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score\n",
"from sklearn.svm import SVC\n",
"from sklearn.linear_model import SGDClassifier\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n",
"from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, mean_absolute_error\n",
"from pprint import pprint\n",
"import plotly.graph_objs as go\n",
"from plotly.offline import iplot\n",
"from pandas.plotting import scatter_matrix\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib.colors import ListedColormap\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn.linear_model import LogisticRegression\n",
"import seaborn as sns\n",
"from sklearn import model_selection\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- _Load in the red wine data from the UCI ML website._"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.width', 1000)\n",
"pd.set_option('display.max_columns',20)\n",
"\n",
"df = pd.read_csv('winequality-red.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- _Let's investigate data set_"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'df' is not defined",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-2-6fae94b3c442>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"
],
"output_type": "error"
}
],
"source": [
"df.head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- _Let's check how the data is distributed_"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 1599 entries, 0 to 1598\n",
"Data columns (total 12 columns):\n",
"fixed acidity 1599 non-null float64\n",
"volatile acidity 1599 non-null float64\n",
"citric acid 1599 non-null float64\n",
"residual sugar 1599 non-null float64\n",
"chlorides 1599 non-null float64\n",
"free sulfur dioxide 1599 non-null float64\n",
"total sulfur dioxide 1599 non-null float64\n",
"density 1599 non-null float64\n",
"pH 1599 non-null float64\n",
"sulphates 1599 non-null float64\n",
"alcohol 1599 non-null float64\n",
"quality 1599 non-null int64\n",
"dtypes: float64(11), int64(1)\n",
"memory usage: 150.0 KB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- _Let’s have a look at data dimensionality, feature names, and feature types. (1599<- rows, 12<-colums)_"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1599, 12)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- _Let’s printing out column names_"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality'], dtype='object')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- _The describe method shows basic statistical characteristics of each numerical feature_"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.319637</td>\n",
" <td>0.527821</td>\n",
" <td>0.270976</td>\n",
" <td>2.538806</td>\n",
" <td>0.087467</td>\n",
" <td>15.874922</td>\n",
" <td>46.467792</td>\n",
" <td>0.996747</td>\n",
" <td>3.311113</td>\n",
" <td>0.658149</td>\n",
" <td>10.422983</td>\n",
" <td>5.636023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.741096</td>\n",
" <td>0.179060</td>\n",
" <td>0.194801</td>\n",
" <td>1.409928</td>\n",
" <td>0.047065</td>\n",
" <td>10.460157</td>\n",
" <td>32.895324</td>\n",
" <td>0.001887</td>\n",
" <td>0.154386</td>\n",
" <td>0.169507</td>\n",
" <td>1.065668</td>\n",
" <td>0.807569</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.600000</td>\n",
" <td>0.120000</td>\n",
" <td>0.000000</td>\n",
" <td>0.900000</td>\n",
" <td>0.012000</td>\n",
" <td>1.000000</td>\n",
" <td>6.000000</td>\n",
" <td>0.990070</td>\n",
" <td>2.740000</td>\n",
" <td>0.330000</td>\n",
" <td>8.400000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.100000</td>\n",
" <td>0.390000</td>\n",
" <td>0.090000</td>\n",
" <td>1.900000</td>\n",
" <td>0.070000</td>\n",
" <td>7.000000</td>\n",
" <td>22.000000</td>\n",
" <td>0.995600</td>\n",
" <td>3.210000</td>\n",
" <td>0.550000</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.900000</td>\n",
" <td>0.520000</td>\n",
" <td>0.260000</td>\n",
" <td>2.200000</td>\n",
" <td>0.079000</td>\n",
" <td>14.000000</td>\n",
" <td>38.000000</td>\n",
" <td>0.996750</td>\n",
" <td>3.310000</td>\n",
" <td>0.620000</td>\n",
" <td>10.200000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.200000</td>\n",
" <td>0.640000</td>\n",
" <td>0.420000</td>\n",
" <td>2.600000</td>\n",
" <td>0.090000</td>\n",
" <td>21.000000</td>\n",
" <td>62.000000</td>\n",
" <td>0.997835</td>\n",
" <td>3.400000</td>\n",
" <td>0.730000</td>\n",
" <td>11.100000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.900000</td>\n",
" <td>1.580000</td>\n",
" <td>1.000000</td>\n",
" <td>15.500000</td>\n",
" <td>0.611000</td>\n",
" <td>72.000000</td>\n",
" <td>289.000000</td>\n",
" <td>1.003690</td>\n",
" <td>4.010000</td>\n",
" <td>2.000000</td>\n",
" <td>14.900000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality\n",
"count 1599.000000 1599.000000 1599.000000 1599.000000 1599.000000 1599.000000 1599.000000 1599.000000 1599.000000 1599.000000 1599.000000 1599.000000\n",
"mean 8.319637 0.527821 0.270976 2.538806 0.087467 15.874922 46.467792 0.996747 3.311113 0.658149 10.422983 5.636023\n",
"std 1.741096 0.179060 0.194801 1.409928 0.047065 10.460157 32.895324 0.001887 0.154386 0.169507 1.065668 0.807569\n",
"min 4.600000 0.120000 0.000000 0.900000 0.012000 1.000000 6.000000 0.990070 2.740000 0.330000 8.400000 3.000000\n",
"25% 7.100000 0.390000 0.090000 1.900000 0.070000 7.000000 22.000000 0.995600 3.210000 0.550000 9.500000 5.000000\n",
"50% 7.900000 0.520000 0.260000 2.200000 0.079000 14.000000 38.000000 0.996750 3.310000 0.620000 10.200000 6.000000\n",
"75% 9.200000 0.640000 0.420000 2.600000 0.090000 21.000000 62.000000 0.997835 3.400000 0.730000 11.100000 6.000000\n",
"max 15.900000 1.580000 1.000000 15.500000 0.611000 72.000000 289.000000 1.003690 4.010000 2.000000 14.900000 8.000000"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>495</th>\n",
" <td>10.7</td>\n",
" <td>0.350</td>\n",
" <td>0.53</td>\n",
" <td>2.60</td>\n",
" <td>0.070</td>\n",
" <td>5.0</td>\n",
" <td>16.0</td>\n",
" <td>0.99720</td>\n",
" <td>3.15</td>\n",
" <td>0.65</td>\n",
" <td>11.0</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1403</th>\n",
" <td>7.2</td>\n",
" <td>0.330</td>\n",
" <td>0.33</td>\n",
" <td>1.70</td>\n",
" <td>0.061</td>\n",
" <td>3.0</td>\n",
" <td>13.0</td>\n",
" <td>0.99600</td>\n",
" <td>3.23</td>\n",
" <td>1.10</td>\n",
" <td>10.0</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>390</th>\n",
" <td>5.6</td>\n",
" <td>0.850</td>\n",
" <td>0.05</td>\n",
" <td>1.40</td>\n",
" <td>0.045</td>\n",
" <td>12.0</td>\n",
" <td>88.0</td>\n",
" <td>0.99240</td>\n",
" <td>3.56</td>\n",
" <td>0.82</td>\n",
" <td>12.9</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1061</th>\n",
" <td>9.1</td>\n",
" <td>0.400</td>\n",
" <td>0.50</td>\n",
" <td>1.80</td>\n",
" <td>0.071</td>\n",
" <td>7.0</td>\n",
" <td>16.0</td>\n",
" <td>0.99462</td>\n",
" <td>3.21</td>\n",
" <td>0.69</td>\n",
" <td>12.5</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1202</th>\n",
" <td>8.6</td>\n",
" <td>0.420</td>\n",
" <td>0.39</td>\n",
" <td>1.80</td>\n",
" <td>0.068</td>\n",
" <td>6.0</td>\n",
" <td>12.0</td>\n",
" <td>0.99516</td>\n",
" <td>3.35</td>\n",
" <td>0.69</td>\n",
" <td>11.7</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>828</th>\n",
" <td>7.8</td>\n",
" <td>0.570</td>\n",
" <td>0.09</td>\n",
" <td>2.30</td>\n",
" <td>0.065</td>\n",
" <td>34.0</td>\n",
" <td>45.0</td>\n",
" <td>0.99417</td>\n",
" <td>3.46</td>\n",
" <td>0.74</td>\n",
" <td>12.7</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>481</th>\n",
" <td>9.4</td>\n",
" <td>0.300</td>\n",
" <td>0.56</td>\n",
" <td>2.80</td>\n",
" <td>0.080</td>\n",
" <td>6.0</td>\n",
" <td>17.0</td>\n",
" <td>0.99640</td>\n",
" <td>3.15</td>\n",
" <td>0.92</td>\n",
" <td>11.7</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>455</th>\n",
" <td>11.3</td>\n",
" <td>0.620</td>\n",
" <td>0.67</td>\n",
" <td>5.20</td>\n",
" <td>0.086</td>\n",
" <td>6.0</td>\n",
" <td>19.0</td>\n",
" <td>0.99880</td>\n",
" <td>3.22</td>\n",
" <td>0.69</td>\n",
" <td>13.4</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1449</th>\n",
" <td>7.2</td>\n",
" <td>0.380</td>\n",
" <td>0.31</td>\n",
" <td>2.00</td>\n",
" <td>0.056</td>\n",
" <td>15.0</td>\n",
" <td>29.0</td>\n",
" <td>0.99472</td>\n",
" <td>3.23</td>\n",
" <td>0.76</td>\n",
" <td>11.3</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>440</th>\n",
" <td>12.6</td>\n",
" <td>0.310</td>\n",
" <td>0.72</td>\n",
" <td>2.20</td>\n",
" <td>0.072</td>\n",
" <td>6.0</td>\n",
" <td>29.0</td>\n",
" <td>0.99870</td>\n",
" <td>2.88</td>\n",
" <td>0.82</td>\n",
" <td>9.8</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1549</th>\n",
" <td>7.4</td>\n",
" <td>0.360</td>\n",
" <td>0.30</td>\n",
" <td>1.80</td>\n",
" <td>0.074</td>\n",
" <td>17.0</td>\n",
" <td>24.0</td>\n",
" <td>0.99419</td>\n",
" <td>3.24</td>\n",
" <td>0.70</td>\n",
" <td>11.4</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>498</th>\n",
" <td>10.7</td>\n",
" <td>0.350</td>\n",
" <td>0.53</td>\n",
" <td>2.60</td>\n",
" <td>0.070</td>\n",
" <td>5.0</td>\n",
" <td>16.0</td>\n",
" <td>0.99720</td>\n",
" <td>3.15</td>\n",
" <td>0.65</td>\n",
" <td>11.0</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>267</th>\n",
" <td>7.9</td>\n",
" <td>0.350</td>\n",
" <td>0.46</td>\n",
" <td>3.60</td>\n",
" <td>0.078</td>\n",
" <td>15.0</td>\n",
" <td>37.0</td>\n",
" <td>0.99730</td>\n",
" <td>3.35</td>\n",
" <td>0.86</td>\n",
" <td>12.8</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1269</th>\n",
" <td>5.5</td>\n",
" <td>0.490</td>\n",
" <td>0.03</td>\n",
" <td>1.80</td>\n",
" <td>0.044</td>\n",
" <td>28.0</td>\n",
" <td>87.0</td>\n",
" <td>0.99080</td>\n",
" <td>3.50</td>\n",
" <td>0.82</td>\n",
" <td>14.0</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>278</th>\n",
" <td>10.3</td>\n",
" <td>0.320</td>\n",
" <td>0.45</td>\n",
" <td>6.40</td>\n",
" <td>0.073</td>\n",
" <td>5.0</td>\n",
" <td>13.0</td>\n",
" <td>0.99760</td>\n",
" <td>3.23</td>\n",
" <td>0.82</td>\n",
" <td>12.6</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1120</th>\n",
" <td>7.9</td>\n",
" <td>0.540</td>\n",
" <td>0.34</td>\n",
" <td>2.50</td>\n",
" <td>0.076</td>\n",
" <td>8.0</td>\n",
" <td>17.0</td>\n",
" <td>0.99235</td>\n",
" <td>3.20</td>\n",
" <td>0.72</td>\n",
" <td>13.1</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>588</th>\n",
" <td>5.0</td>\n",
" <td>0.420</td>\n",
" <td>0.24</td>\n",
" <td>2.00</td>\n",
" <td>0.060</td>\n",
" <td>19.0</td>\n",
" <td>50.0</td>\n",
" <td>0.99170</td>\n",
" <td>3.72</td>\n",
" <td>0.74</td>\n",
" <td>14.0</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1090</th>\n",
" <td>10.0</td>\n",
" <td>0.260</td>\n",
" <td>0.54</td>\n",
" <td>1.90</td>\n",
" <td>0.083</td>\n",
" <td>42.0</td>\n",
" <td>74.0</td>\n",
" <td>0.99451</td>\n",
" <td>2.98</td>\n",
" <td>0.63</td>\n",
" <td>11.8</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>488</th>\n",
" <td>11.6</td>\n",
" <td>0.320</td>\n",
" <td>0.55</td>\n",
" <td>2.80</td>\n",
" <td>0.081</td>\n",
" <td>35.0</td>\n",
" <td>67.0</td>\n",
" <td>1.00020</td>\n",
" <td>3.32</td>\n",
" <td>0.92</td>\n",
" <td>10.8</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>491</th>\n",
" <td>9.2</td>\n",
" <td>0.410</td>\n",
" <td>0.50</td>\n",
" <td>2.50</td>\n",
" <td>0.055</td>\n",
" <td>12.0</td>\n",
" <td>25.0</td>\n",
" <td>0.99520</td>\n",
" <td>3.34</td>\n",
" <td>0.79</td>\n",
" <td>13.3</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>996</th>\n",
" <td>5.6</td>\n",
" <td>0.660</td>\n",
" <td>0.00</td>\n",
" <td>2.20</td>\n",
" <td>0.087</td>\n",
" <td>3.0</td>\n",
" <td>11.0</td>\n",
" <td>0.99378</td>\n",
" <td>3.71</td>\n",
" <td>0.63</td>\n",
" <td>12.8</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1132</th>\n",
" <td>7.4</td>\n",
" <td>0.360</td>\n",
" <td>0.34</td>\n",
" <td>1.80</td>\n",
" <td>0.075</td>\n",
" <td>18.0</td>\n",
" <td>38.0</td>\n",
" <td>0.99330</td>\n",
" <td>3.38</td>\n",
" <td>0.88</td>\n",
" <td>13.6</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492</th>\n",
" <td>8.9</td>\n",
" <td>0.400</td>\n",
" <td>0.51</td>\n",
" <td>2.60</td>\n",
" <td>0.052</td>\n",
" <td>13.0</td>\n",
" <td>27.0</td>\n",
" <td>0.99500</td>\n",
" <td>3.32</td>\n",
" <td>0.90</td>\n",
" <td>13.4</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>357</th>\n",
" <td>10.5</td>\n",
" <td>0.420</td>\n",
" <td>0.66</td>\n",
" <td>2.95</td>\n",
" <td>0.116</td>\n",
" <td>12.0</td>\n",
" <td>29.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.24</td>\n",
" <td>0.75</td>\n",
" <td>11.7</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1133</th>\n",
" <td>7.2</td>\n",
" <td>0.480</td>\n",
" <td>0.07</td>\n",
" <td>5.50</td>\n",
" <td>0.089</td>\n",
" <td>10.0</td>\n",
" <td>18.0</td>\n",
" <td>0.99684</td>\n",
" <td>3.37</td>\n",
" <td>0.68</td>\n",
" <td>11.2</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1134</th>\n",
" <td>8.5</td>\n",
" <td>0.280</td>\n",
" <td>0.35</td>\n",
" <td>1.70</td>\n",
" <td>0.061</td>\n",
" <td>6.0</td>\n",
" <td>15.0</td>\n",
" <td>0.99524</td>\n",
" <td>3.30</td>\n",
" <td>0.74</td>\n",
" <td>11.8</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>896</th>\n",
" <td>8.3</td>\n",
" <td>0.310</td>\n",
" <td>0.39</td>\n",
" <td>2.40</td>\n",
" <td>0.078</td>\n",
" <td>17.0</td>\n",
" <td>43.0</td>\n",
" <td>0.99444</td>\n",
" <td>3.31</td>\n",
" <td>0.77</td>\n",
" <td>12.5</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>887</th>\n",
" <td>10.7</td>\n",
" <td>0.520</td>\n",
" <td>0.38</td>\n",
" <td>2.60</td>\n",
" <td>0.066</td>\n",
" <td>29.0</td>\n",
" <td>56.0</td>\n",
" <td>0.99577</td>\n",
" <td>3.15</td>\n",
" <td>0.79</td>\n",
" <td>12.1</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>901</th>\n",
" <td>7.4</td>\n",
" <td>0.635</td>\n",
" <td>0.10</td>\n",
" <td>2.40</td>\n",
" <td>0.080</td>\n",
" <td>16.0</td>\n",
" <td>33.0</td>\n",
" <td>0.99736</td>\n",
" <td>3.58</td>\n",
" <td>0.69</td>\n",
" <td>10.8</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>902</th>\n",
" <td>7.4</td>\n",
" <td>0.635</td>\n",
" <td>0.10</td>\n",
" <td>2.40</td>\n",
" <td>0.080</td>\n",
" <td>16.0</td>\n",
" <td>33.0</td>\n",
" <td>0.99736</td>\n",
" <td>3.58</td>\n",
" <td>0.69</td>\n",
" <td>10.8</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>948</th>\n",
" <td>8.9</td>\n",
" <td>0.120</td>\n",
" <td>0.45</td>\n",
" <td>1.80</td>\n",
" <td>0.075</td>\n",
" <td>10.0</td>\n",
" <td>21.0</td>\n",
" <td>0.99552</td>\n",
" <td>3.41</td>\n",
" <td>0.76</td>\n",
" <td>11.9</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>949</th>\n",
" <td>8.9</td>\n",
" <td>0.120</td>\n",
" <td>0.45</td>\n",
" <td>1.80</td>\n",
" <td>0.075</td>\n",
" <td>10.0</td>\n",
" <td>21.0</td>\n",
" <td>0.99552</td>\n",
" <td>3.41</td>\n",
" <td>0.76</td>\n",
" <td>11.9</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>950</th>\n",
" <td>8.9</td>\n",
" <td>0.120</td>\n",
" <td>0.45</td>\n",
" <td>1.80</td>\n",
" <td>0.075</td>\n",
" <td>10.0</td>\n",
" <td>21.0</td>\n",
" <td>0.99552</td>\n",
" <td>3.41</td>\n",
" <td>0.76</td>\n",
" <td>11.9</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>951</th>\n",
" <td>8.3</td>\n",
" <td>0.280</td>\n",
" <td>0.48</td>\n",
" <td>2.10</td>\n",
" <td>0.093</td>\n",
" <td>6.0</td>\n",
" <td>12.0</td>\n",
" <td>0.99408</td>\n",
" <td>3.26</td>\n",
" <td>0.62</td>\n",
" <td>12.4</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>952</th>\n",
" <td>8.2</td>\n",
" <td>0.310</td>\n",
" <td>0.40</td>\n",
" <td>2.20</td>\n",
" <td>0.058</td>\n",
" <td>6.0</td>\n",
" <td>10.0</td>\n",
" <td>0.99536</td>\n",
" <td>3.31</td>\n",
" <td>0.68</td>\n",
" <td>11.2</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1192</th>\n",
" <td>7.2</td>\n",
" <td>0.250</td>\n",
" <td>0.37</td>\n",
" <td>2.50</td>\n",
" <td>0.063</td>\n",
" <td>11.0</td>\n",
" <td>41.0</td>\n",
" <td>0.99439</td>\n",
" <td>3.52</td>\n",
" <td>0.80</td>\n",
" <td>12.4</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>953</th>\n",
" <td>10.2</td>\n",
" <td>0.340</td>\n",
" <td>0.48</td>\n",
" <td>2.10</td>\n",
" <td>0.052</td>\n",
" <td>5.0</td>\n",
" <td>9.0</td>\n",
" <td>0.99458</td>\n",
" <td>3.20</td>\n",
" <td>0.69</td>\n",
" <td>12.1</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>230</th>\n",
" <td>5.2</td>\n",
" <td>0.480</td>\n",
" <td>0.04</td>\n",
" <td>1.60</td>\n",
" <td>0.054</td>\n",
" <td>19.0</td>\n",
" <td>106.0</td>\n",
" <td>0.99270</td>\n",
" <td>3.54</td>\n",
" <td>0.62</td>\n",
" <td>12.2</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1156</th>\n",
" <td>8.5</td>\n",
" <td>0.180</td>\n",
" <td>0.51</td>\n",
" <td>1.75</td>\n",
" <td>0.071</td>\n",
" <td>45.0</td>\n",
" <td>88.0</td>\n",
" <td>0.99524</td>\n",
" <td>3.33</td>\n",
" <td>0.76</td>\n",
" <td>11.8</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>958</th>\n",
" <td>6.4</td>\n",
" <td>0.570</td>\n",
" <td>0.12</td>\n",
" <td>2.30</td>\n",
" <td>0.120</td>\n",
" <td>25.0</td>\n",
" <td>36.0</td>\n",
" <td>0.99519</td>\n",
" <td>3.47</td>\n",
" <td>0.71</td>\n",
" <td>11.3</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>430</th>\n",
" <td>10.5</td>\n",
" <td>0.240</td>\n",
" <td>0.47</td>\n",
" <td>2.10</td>\n",
" <td>0.066</td>\n",
" <td>6.0</td>\n",
" <td>24.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.15</td>\n",
" <td>0.90</td>\n",
" <td>11.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1177</th>\n",
" <td>7.1</td>\n",
" <td>0.660</td>\n",
" <td>0.00</td>\n",
" <td>2.40</td>\n",
" <td>0.052</td>\n",
" <td>6.0</td>\n",
" <td>11.0</td>\n",
" <td>0.99318</td>\n",
" <td>3.35</td>\n",
" <td>0.66</td>\n",
" <td>12.7</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1157</th>\n",
" <td>5.1</td>\n",
" <td>0.510</td>\n",
" <td>0.18</td>\n",
" <td>2.10</td>\n",
" <td>0.042</td>\n",
" <td>16.0</td>\n",
" <td>101.0</td>\n",
" <td>0.99240</td>\n",
" <td>3.46</td>\n",
" <td>0.87</td>\n",
" <td>12.9</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>420</th>\n",
" <td>9.5</td>\n",
" <td>0.560</td>\n",
" <td>0.33</td>\n",
" <td>2.40</td>\n",
" <td>0.089</td>\n",
" <td>35.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99720</td>\n",
" <td>3.28</td>\n",
" <td>0.73</td>\n",
" <td>11.8</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1160</th>\n",
" <td>10.6</td>\n",
" <td>0.360</td>\n",
" <td>0.57</td>\n",
" <td>2.30</td>\n",
" <td>0.087</td>\n",
" <td>6.0</td>\n",
" <td>20.0</td>\n",
" <td>0.99676</td>\n",
" <td>3.14</td>\n",
" <td>0.72</td>\n",
" <td>11.1</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>421</th>\n",
" <td>6.6</td>\n",
" <td>0.840</td>\n",
" <td>0.03</td>\n",
" <td>2.30</td>\n",
" <td>0.059</td>\n",
" <td>32.0</td>\n",
" <td>48.0</td>\n",
" <td>0.99520</td>\n",
" <td>3.52</td>\n",
" <td>0.56</td>\n",
" <td>12.3</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>425</th>\n",
" <td>6.6</td>\n",
" <td>0.840</td>\n",
" <td>0.03</td>\n",
" <td>2.30</td>\n",
" <td>0.059</td>\n",
" <td>32.0</td>\n",
" <td>48.0</td>\n",
" <td>0.99520</td>\n",
" <td>3.52</td>\n",
" <td>0.56</td>\n",
" <td>12.3</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>966</th>\n",
" <td>9.0</td>\n",
" <td>0.380</td>\n",
" <td>0.41</td>\n",
" <td>2.40</td>\n",
" <td>0.103</td>\n",
" <td>6.0</td>\n",
" <td>10.0</td>\n",
" <td>0.99604</td>\n",
" <td>3.13</td>\n",
" <td>0.58</td>\n",
" <td>11.9</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>423</th>\n",
" <td>10.5</td>\n",
" <td>0.240</td>\n",
" <td>0.47</td>\n",
" <td>2.10</td>\n",
" <td>0.066</td>\n",
" <td>6.0</td>\n",
" <td>24.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.15</td>\n",
" <td>0.90</td>\n",
" <td>11.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1162</th>\n",
" <td>8.5</td>\n",
" <td>0.320</td>\n",
" <td>0.42</td>\n",
" <td>2.30</td>\n",
" <td>0.075</td>\n",
" <td>12.0</td>\n",
" <td>19.0</td>\n",
" <td>0.99434</td>\n",
" <td>3.14</td>\n",
" <td>0.71</td>\n",
" <td>11.8</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>243</th>\n",
" <td>15.0</td>\n",
" <td>0.210</td>\n",
" <td>0.44</td>\n",
" <td>2.20</td>\n",
" <td>0.075</td>\n",
" <td>10.0</td>\n",
" <td>24.0</td>\n",
" <td>1.00005</td>\n",
" <td>3.07</td>\n",
" <td>0.84</td>\n",
" <td>9.2</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1206</th>\n",
" <td>7.2</td>\n",
" <td>0.360</td>\n",
" <td>0.46</td>\n",
" <td>2.10</td>\n",
" <td>0.074</td>\n",
" <td>24.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99534</td>\n",
" <td>3.40</td>\n",
" <td>0.85</td>\n",
" <td>11.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1209</th>\n",
" <td>6.2</td>\n",
" <td>0.390</td>\n",
" <td>0.43</td>\n",
" <td>2.00</td>\n",
" <td>0.071</td>\n",
" <td>14.0</td>\n",
" <td>24.0</td>\n",
" <td>0.99428</td>\n",
" <td>3.45</td>\n",
" <td>0.87</td>\n",
" <td>11.2</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>259</th>\n",
" <td>10.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>2.60</td>\n",
" <td>0.085</td>\n",
" <td>14.0</td>\n",
" <td>33.0</td>\n",
" <td>0.99965</td>\n",
" <td>3.36</td>\n",
" <td>0.80</td>\n",
" <td>10.5</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>443</th>\n",
" <td>10.0</td>\n",
" <td>0.440</td>\n",
" <td>0.49</td>\n",
" <td>2.70</td>\n",
" <td>0.077</td>\n",
" <td>11.0</td>\n",
" <td>19.0</td>\n",
" <td>0.99630</td>\n",
" <td>3.23</td>\n",
" <td>0.63</td>\n",
" <td>11.6</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1147</th>\n",
" <td>10.0</td>\n",
" <td>0.410</td>\n",
" <td>0.45</td>\n",
" <td>6.20</td>\n",
" <td>0.071</td>\n",
" <td>6.0</td>\n",
" <td>14.0</td>\n",
" <td>0.99702</td>\n",
" <td>3.21</td>\n",
" <td>0.49</td>\n",
" <td>11.8</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1003</th>\n",
" <td>6.8</td>\n",
" <td>0.360</td>\n",
" <td>0.32</td>\n",
" <td>1.80</td>\n",
" <td>0.067</td>\n",
" <td>4.0</td>\n",
" <td>8.0</td>\n",
" <td>0.99280</td>\n",
" <td>3.36</td>\n",
" <td>0.55</td>\n",
" <td>12.8</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>978</th>\n",
" <td>7.0</td>\n",
" <td>0.400</td>\n",
" <td>0.32</td>\n",
" <td>3.60</td>\n",
" <td>0.061</td>\n",
" <td>9.0</td>\n",
" <td>29.0</td>\n",
" <td>0.99416</td>\n",
" <td>3.28</td>\n",
" <td>0.49</td>\n",
" <td>11.3</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>929</th>\n",
" <td>8.7</td>\n",
" <td>0.330</td>\n",
" <td>0.38</td>\n",
" <td>3.30</td>\n",
" <td>0.063</td>\n",
" <td>10.0</td>\n",
" <td>19.0</td>\n",
" <td>0.99468</td>\n",
" <td>3.30</td>\n",
" <td>0.73</td>\n",
" <td>12.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>974</th>\n",
" <td>8.8</td>\n",
" <td>0.330</td>\n",
" <td>0.41</td>\n",
" <td>5.90</td>\n",
" <td>0.073</td>\n",
" <td>7.0</td>\n",
" <td>13.0</td>\n",
" <td>0.99658</td>\n",
" <td>3.30</td>\n",
" <td>0.62</td>\n",
" <td>12.1</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality\n",
"495 10.7 0.350 0.53 2.60 0.070 5.0 16.0 0.99720 3.15 0.65 11.0 8\n",
"1403 7.2 0.330 0.33 1.70 0.061 3.0 13.0 0.99600 3.23 1.10 10.0 8\n",
"390 5.6 0.850 0.05 1.40 0.045 12.0 88.0 0.99240 3.56 0.82 12.9 8\n",
"1061 9.1 0.400 0.50 1.80 0.071 7.0 16.0 0.99462 3.21 0.69 12.5 8\n",
"1202 8.6 0.420 0.39 1.80 0.068 6.0 12.0 0.99516 3.35 0.69 11.7 8\n",
"828 7.8 0.570 0.09 2.30 0.065 34.0 45.0 0.99417 3.46 0.74 12.7 8\n",
"481 9.4 0.300 0.56 2.80 0.080 6.0 17.0 0.99640 3.15 0.92 11.7 8\n",
"455 11.3 0.620 0.67 5.20 0.086 6.0 19.0 0.99880 3.22 0.69 13.4 8\n",
"1449 7.2 0.380 0.31 2.00 0.056 15.0 29.0 0.99472 3.23 0.76 11.3 8\n",
"440 12.6 0.310 0.72 2.20 0.072 6.0 29.0 0.99870 2.88 0.82 9.8 8\n",
"1549 7.4 0.360 0.30 1.80 0.074 17.0 24.0 0.99419 3.24 0.70 11.4 8\n",
"498 10.7 0.350 0.53 2.60 0.070 5.0 16.0 0.99720 3.15 0.65 11.0 8\n",
"267 7.9 0.350 0.46 3.60 0.078 15.0 37.0 0.99730 3.35 0.86 12.8 8\n",
"1269 5.5 0.490 0.03 1.80 0.044 28.0 87.0 0.99080 3.50 0.82 14.0 8\n",
"278 10.3 0.320 0.45 6.40 0.073 5.0 13.0 0.99760 3.23 0.82 12.6 8\n",
"1120 7.9 0.540 0.34 2.50 0.076 8.0 17.0 0.99235 3.20 0.72 13.1 8\n",
"588 5.0 0.420 0.24 2.00 0.060 19.0 50.0 0.99170 3.72 0.74 14.0 8\n",
"1090 10.0 0.260 0.54 1.90 0.083 42.0 74.0 0.99451 2.98 0.63 11.8 8\n",
"488 11.6 0.320 0.55 2.80 0.081 35.0 67.0 1.00020 3.32 0.92 10.8 7\n",
"491 9.2 0.410 0.50 2.50 0.055 12.0 25.0 0.99520 3.34 0.79 13.3 7\n",
"996 5.6 0.660 0.00 2.20 0.087 3.0 11.0 0.99378 3.71 0.63 12.8 7\n",
"1132 7.4 0.360 0.34 1.80 0.075 18.0 38.0 0.99330 3.38 0.88 13.6 7\n",
"492 8.9 0.400 0.51 2.60 0.052 13.0 27.0 0.99500 3.32 0.90 13.4 7\n",
"357 10.5 0.420 0.66 2.95 0.116 12.0 29.0 0.99700 3.24 0.75 11.7 7\n",
"1133 7.2 0.480 0.07 5.50 0.089 10.0 18.0 0.99684 3.37 0.68 11.2 7\n",
"1134 8.5 0.280 0.35 1.70 0.061 6.0 15.0 0.99524 3.30 0.74 11.8 7\n",
"896 8.3 0.310 0.39 2.40 0.078 17.0 43.0 0.99444 3.31 0.77 12.5 7\n",
"887 10.7 0.520 0.38 2.60 0.066 29.0 56.0 0.99577 3.15 0.79 12.1 7\n",
"901 7.4 0.635 0.10 2.40 0.080 16.0 33.0 0.99736 3.58 0.69 10.8 7\n",
"902 7.4 0.635 0.10 2.40 0.080 16.0 33.0 0.99736 3.58 0.69 10.8 7\n",
"... ... ... ... ... ... ... ... ... ... ... ... ...\n",
"948 8.9 0.120 0.45 1.80 0.075 10.0 21.0 0.99552 3.41 0.76 11.9 7\n",
"949 8.9 0.120 0.45 1.80 0.075 10.0 21.0 0.99552 3.41 0.76 11.9 7\n",
"950 8.9 0.120 0.45 1.80 0.075 10.0 21.0 0.99552 3.41 0.76 11.9 7\n",
"951 8.3 0.280 0.48 2.10 0.093 6.0 12.0 0.99408 3.26 0.62 12.4 7\n",
"952 8.2 0.310 0.40 2.20 0.058 6.0 10.0 0.99536 3.31 0.68 11.2 7\n",
"1192 7.2 0.250 0.37 2.50 0.063 11.0 41.0 0.99439 3.52 0.80 12.4 7\n",
"953 10.2 0.340 0.48 2.10 0.052 5.0 9.0 0.99458 3.20 0.69 12.1 7\n",
"230 5.2 0.480 0.04 1.60 0.054 19.0 106.0 0.99270 3.54 0.62 12.2 7\n",
"1156 8.5 0.180 0.51 1.75 0.071 45.0 88.0 0.99524 3.33 0.76 11.8 7\n",
"958 6.4 0.570 0.12 2.30 0.120 25.0 36.0 0.99519 3.47 0.71 11.3 7\n",
"430 10.5 0.240 0.47 2.10 0.066 6.0 24.0 0.99780 3.15 0.90 11.0 7\n",
"1177 7.1 0.660 0.00 2.40 0.052 6.0 11.0 0.99318 3.35 0.66 12.7 7\n",
"1157 5.1 0.510 0.18 2.10 0.042 16.0 101.0 0.99240 3.46 0.87 12.9 7\n",
"420 9.5 0.560 0.33 2.40 0.089 35.0 67.0 0.99720 3.28 0.73 11.8 7\n",
"1160 10.6 0.360 0.57 2.30 0.087 6.0 20.0 0.99676 3.14 0.72 11.1 7\n",
"421 6.6 0.840 0.03 2.30 0.059 32.0 48.0 0.99520 3.52 0.56 12.3 7\n",
"425 6.6 0.840 0.03 2.30 0.059 32.0 48.0 0.99520 3.52 0.56 12.3 7\n",
"966 9.0 0.380 0.41 2.40 0.103 6.0 10.0 0.99604 3.13 0.58 11.9 7\n",
"423 10.5 0.240 0.47 2.10 0.066 6.0 24.0 0.99780 3.15 0.90 11.0 7\n",
"1162 8.5 0.320 0.42 2.30 0.075 12.0 19.0 0.99434 3.14 0.71 11.8 7\n",
"243 15.0 0.210 0.44 2.20 0.075 10.0 24.0 1.00005 3.07 0.84 9.2 7\n",
"1206 7.2 0.360 0.46 2.10 0.074 24.0 44.0 0.99534 3.40 0.85 11.0 7\n",
"1209 6.2 0.390 0.43 2.00 0.071 14.0 24.0 0.99428 3.45 0.87 11.2 7\n",
"259 10.0 0.310 0.47 2.60 0.085 14.0 33.0 0.99965 3.36 0.80 10.5 7\n",
"443 10.0 0.440 0.49 2.70 0.077 11.0 19.0 0.99630 3.23 0.63 11.6 7\n",
"1147 10.0 0.410 0.45 6.20 0.071 6.0 14.0 0.99702 3.21 0.49 11.8 7\n",
"1003 6.8 0.360 0.32 1.80 0.067 4.0 8.0 0.99280 3.36 0.55 12.8 7\n",
"978 7.0 0.400 0.32 3.60 0.061 9.0 29.0 0.99416 3.28 0.49 11.3 7\n",
"929 8.7 0.330 0.38 3.30 0.063 10.0 19.0 0.99468 3.30 0.73 12.0 7\n",
"974 8.8 0.330 0.41 5.90 0.073 7.0 13.0 0.99658 3.30 0.62 12.1 7\n",
"\n",
"[100 rows x 12 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted_df=df.sort_values(by='quality', ascending=False)\n",
"sorted_df.head(100)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- _Get mean for all colum with highest mark_"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"fixed acidity 8.566667\n",
"volatile acidity 0.423333\n",
"citric acid 0.391111\n",
"residual sugar 2.577778\n",
"chlorides 0.068444\n",
"free sulfur dioxide 13.277778\n",
"total sulfur dioxide 33.444444\n",
"density 0.995212\n",
"pH 3.267222\n",
"sulphates 0.767778\n",
"alcohol 12.094444\n",
"quality 8.000000\n",
"dtype: float64\n"
]
}
],
"source": [
"print(df[df['quality'] == 8].mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- _Create pivot\\_table_"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>alcohol</th>\n",
" <th>chlorides</th>\n",
" <th>citric acid</th>\n",
" <th>density</th>\n",
" <th>fixed acidity</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>pH</th>\n",
" <th>residual sugar</th>\n",
" <th>sulphates</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>volatile acidity</th>\n",
" </tr>\n",
" <tr>\n",
" <th>quality</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>9.925</td>\n",
" <td>0.0905</td>\n",
" <td>0.035</td>\n",
" <td>0.997565</td>\n",
" <td>7.50</td>\n",
" <td>6.0</td>\n",
" <td>3.39</td>\n",
" <td>2.1</td>\n",
" <td>0.545</td>\n",
" <td>15.0</td>\n",
" <td>0.845</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10.000</td>\n",
" <td>0.0800</td>\n",
" <td>0.090</td>\n",
" <td>0.996500</td>\n",
" <td>7.50</td>\n",
" <td>11.0</td>\n",
" <td>3.37</td>\n",
" <td>2.1</td>\n",
" <td>0.560</td>\n",
" <td>26.0</td>\n",
" <td>0.670</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>9.700</td>\n",
" <td>0.0810</td>\n",
" <td>0.230</td>\n",
" <td>0.997000</td>\n",
" <td>7.80</td>\n",
" <td>15.0</td>\n",
" <td>3.30</td>\n",
" <td>2.2</td>\n",
" <td>0.580</td>\n",
" <td>47.0</td>\n",
" <td>0.580</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>10.500</td>\n",
" <td>0.0780</td>\n",
" <td>0.260</td>\n",
" <td>0.996560</td>\n",
" <td>7.90</td>\n",
" <td>14.0</td>\n",
" <td>3.32</td>\n",
" <td>2.2</td>\n",
" <td>0.640</td>\n",
" <td>35.0</td>\n",
" <td>0.490</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>11.500</td>\n",
" <td>0.0730</td>\n",
" <td>0.400</td>\n",
" <td>0.995770</td>\n",
" <td>8.80</td>\n",
" <td>11.0</td>\n",
" <td>3.28</td>\n",
" <td>2.3</td>\n",
" <td>0.740</td>\n",
" <td>27.0</td>\n",
" <td>0.370</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>12.150</td>\n",
" <td>0.0705</td>\n",
" <td>0.420</td>\n",
" <td>0.994940</td>\n",
" <td>8.25</td>\n",
" <td>7.5</td>\n",
" <td>3.23</td>\n",
" <td>2.1</td>\n",
" <td>0.740</td>\n",
" <td>21.5</td>\n",
" <td>0.370</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" alcohol chlorides citric acid density fixed acidity free sulfur dioxide pH residual sugar sulphates total sulfur dioxide volatile acidity\n",
"quality \n",
"3 9.925 0.0905 0.035 0.997565 7.50 6.0 3.39 2.1 0.545 15.0 0.845\n",
"4 10.000 0.0800 0.090 0.996500 7.50 11.0 3.37 2.1 0.560 26.0 0.670\n",
"5 9.700 0.0810 0.230 0.997000 7.80 15.0 3.30 2.2 0.580 47.0 0.580\n",
"6 10.500 0.0780 0.260 0.996560 7.90 14.0 3.32 2.2 0.640 35.0 0.490\n",
"7 11.500 0.0730 0.400 0.995770 8.80 11.0 3.28 2.3 0.740 27.0 0.370\n",
"8 12.150 0.0705 0.420 0.994940 8.25 7.5 3.23 2.1 0.740 21.5 0.370"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"colum_names = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol']\n",
"df_pivot_table = df.pivot_table(colum_names,['quality'], aggfunc='median')\n",
"\n",
"df_pivot_table"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- _Corelation matrix_"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"quality 1.000000\n",
"alcohol 0.476166\n",
"sulphates 0.251397\n",
"citric acid 0.226373\n",
"fixed acidity 0.124052\n",
"residual sugar 0.013732\n",
"free sulfur dioxide -0.050656\n",
"pH -0.057731\n",
"chlorides -0.128907\n",
"density -0.174919\n",
"total sulfur dioxide -0.185100\n",
"volatile acidity -0.390558\n",
"Name: quality, dtype: float64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"corr_matrix = df.corr()\n",
"corr_matrix[\"quality\"].sort_values(ascending=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"***\n",
"## Data visualization\n",
"***"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- Data distribution"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure(figsize=(6, 5))\n",
"sns.barplot(x='quality', y='sulphates', data=df)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- Histograms"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x360 with 12 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"df.hist(bins=10,figsize=(6, 5))\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- Density Plots"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 12 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"df.plot(kind='density', subplots=True, layout=(4,3), sharex=False)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.distplot(df['quality'], hist=True, kde=True,\n",
" bins='auto', color = 'darkblue',\n",
" hist_kws={'edgecolor':'black'},\n",
" kde_kws={'linewidth': 4})\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- Column names"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"colum_names = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"correlations = df.corr()\n",
"# Plot figsize\n",
"fig, ax = plt.subplots(figsize=(10, 10))\n",
"# Generate Color Map\n",
"colormap = sns.diverging_palette(220, 10, as_cmap=True)\n",
"# Generate Heat Map, allow annotations and place floats in map\n",
"sns.heatmap(correlations, cmap=colormap, annot=True, fmt=\".2f\")\n",
"ax.set_xticklabels(\n",
" colum_names,\n",
" rotation=45,\n",
" horizontalalignment='right'\n",
");\n",
"ax.set_yticklabels(colum_names);\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sm = scatter_matrix(df, figsize=(6, 6), diagonal='kde')\n",
"#Change label rotation\n",
"[s.xaxis.label.set_rotation(40) for s in sm.reshape(-1)]\n",
"[s.yaxis.label.set_rotation(0) for s in sm.reshape(-1)]\n",
"#May need to offset label when rotating to prevent overlap of figure\n",
"[s.get_yaxis().set_label_coords(-0.6,0.5) for s in sm.reshape(-1)]\n",
"#Hide all ticks\n",
"[s.set_xticks(()) for s in sm.reshape(-1)]\n",
"[s.set_yticks(()) for s in sm.reshape(-1)]\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"***\n",
"## Data Preprocessing\n",
"***"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"+ Making binary classificaion for the response variable."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Dividing wine as good and bad by giving the limit for the quality\n",
"bins = (2, 6, 8)\n",
"group_names = ['bad', 'good']\n",
"df['quality'] = pd.cut(df['quality'], bins = bins, labels = group_names)\n",
"df.head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* Now lets assign a labels to our quality variable"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"label_quality = LabelEncoder()\n",
"# Bad becomes 0 and good becomes 1\n",
"df['quality'] = label_quality.fit_transform(df['quality'])\n",
"df['quality'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* Now seperate the dataset as response variable and feature variabes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"X = df.drop('quality', axis=1)\n",
"y = df['quality']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* Train and Test splitting of data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=50)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* Applying Standard scaling to get optimized result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sc = StandardScaler()\n",
"X_train = sc.fit_transform(X_train)\n",
"X_test = sc.fit_transform(X_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"***\n",
"## Machine learning algorithms\n",
"***"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import warnings filter\n",
"from warnings import simplefilter\n",
"# ignore all future warnings\n",
"simplefilter(action='ignore', category=FutureWarning)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"_Prepare configuration for cross validation test harness_"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"seed = 7\n",
"# prepare models\n",
"models = []\n",
"models.append(('SupportVectorClassifier', SVC()))\n",
"models.append(('StochasticGradientDecentC', SGDClassifier()))\n",
"models.append(('RandomForestClassifier', RandomForestClassifier()))\n",
"models.append(('DecisionTreeClassifier', DecisionTreeClassifier()))\n",
"models.append(('GaussianNB', GaussianNB()))\n",
"models.append(('KNeighborsClassifier', KNeighborsClassifier()))\n",
"models.append(('AdaBoostClassifier', AdaBoostClassifier()))\n",
"models.append(('LogisticRegression', LogisticRegression()))\n",
"\n",
"# evaluate each model in turn\n",
"results = []\n",
"names = []\n",
"scoring = 'accuracy'\n",
"for name, model in models:\n",
"\tkfold = model_selection.KFold(n_splits=10, random_state=seed)\n",
"\tcv_results = model_selection.cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)\n",
"\tresults.append(cv_results)\n",
"\tnames.append(name)\n",
"\tmsg = \"%s: %f (%f)\" % (name, cv_results.mean(), cv_results.std())\n",
"\tprint(msg)\n",
"# boxplot algorithm comparison\n",
"fig = plt.figure()\n",
"fig.suptitle('Algorithm Comparison')\n",
"ax = fig.add_subplot(111)\n",
"plt.boxplot(results)\n",
"ax.set_xticklabels(names)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**1) Support Vector Classifier (svc)**\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"svc = SVC()\n",
"svc.fit(X_train, y_train)\n",
"pred_svc = svc.predict(X_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"_Grid Search CV (finding best parameters for svc model)_"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def svc_param_selection(X, y, nfolds):\n",
" param = {\n",
" 'C': [0.1, 0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4],\n",
" 'kernel': ['linear', 'rbf'],\n",
" 'gamma': [0.1, 0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4]\n",
" }\n",
" grid_search = GridSearchCV(svc, param_grid=param, scoring='accuracy', cv=nfolds)\n",
" grid_search.fit(X, y)\n",
" return grid_search.best_params_"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"svc_param_selection(X_train, y_train,10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"_Best params had found **{'C': 1.3, 'gamma': 1.3, 'kernel': 'rbf'}**_"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"_Let's run our SVC again with the best parameters._"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"\n",
"svc2 = SVC(C = 1.3, gamma = 1.3, kernel= 'rbf')\n",
"svc2.fit(X_train, y_train)\n",
"pred_svc2 = svc2.predict(X_test)\n",
"print('Confusion matrix')\n",
"print(confusion_matrix(y_test, pred_svc2))\n",
"print('Classification report')\n",
"print(classification_report(y_test, pred_svc2))\n",
"print('Accuracy score',accuracy_score(y_test, pred_svc2))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**2) Stochastic Gradient Decent Classifier**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sgd = SGDClassifier(loss=\"hinge\", penalty=\"l2\", max_iter=10)\n",
"sgd.fit(X_train, y_train)\n",
"pred_sgd = sgd.predict(X_test)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**3) Random Forest Classifier**\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"rfc = RandomForestClassifier(n_estimators=200, max_depth=20,\n",
" random_state=0)\n",
"rfc.fit(X_train, y_train)\n",
"pred_rfc = rfc.predict(X_test)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**4) KNeighborsClassifier**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"h = .02 # step size in the mesh\n",
"n_neighbors = 2\n",
"# Create color maps\n",
"cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])\n",
"cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])\n",
"\n",
"for weights in ['uniform', 'distance']:\n",
" nbrs = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights)\n",
" nbrs.fit(X_train,y_train)\n",
" pred_nbrs = nbrs.predict(X_test)\n",
" print('KNeighborsClassifier')\n",
" print(classification_report(y_test, pred_nbrs))\n",
"\n",
" # Plot the decision boundary. For that, we will assign a color to each\n",
" # point in the mesh [x_min, x_max]x[y_min, y_max].\n",
"\n",
" x_min, x_max = X_train[:, 0].min() -1, X_train[:, 0].max() + 1\n",
" y_min, y_max = X_train[:, 1].min() -1, X_train[:, 1].max() + 1\n",
"\n",
" xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))\n",
" Z = nbrs.predict(np.c_[xx.ravel(), yy.ravel()])\n",
" # Put the result into a color plot\n",
" Z = Z.reshape(xx.shape)\n",
" plt.figure()\n",
" plt.pcolormesh(xx, yy, Z, cmap=cmap_light)\n",
"\n",
" # Plot also the training points\n",
" plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold,\n",
" edgecolor='k', s=20)\n",
" plt.xlim(xx.min(), xx.max())\n",
" plt.ylim(yy.min(), yy.max())\n",
" plt.title(\"3-Class classification (k = %i, weights = '%s')\"\n",
" % (n_neighbors, weights))\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**5) AdaBoostClassifier**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ada_classifier = AdaBoostClassifier(n_estimators=100)\n",
"ada_classifier.fit(X_train, y_train)\n",
"pred_ada = ada_classifier.predict(X_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**6) Evaluate model**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"def evaluate(model, test_features, test_labels):\n",
" predictions = model.predict(test_features)\n",
" print('Model Performance')\n",
" print('Average Error: {:0.4f} degrees.'.format(mean_absolute_error(test_labels,predictions)))\n",
" print('Accuracy = {:0.2f}%.'.format(accuracy_score(test_labels,predictions)*100))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"evaluate(svc,X_test,y_test)\n",
"evaluate(svc2,X_test,y_test)\n",
"evaluate(sgd,X_test,y_test)\n",
"evaluate(rfc,X_test,y_test)\n",
"evaluate(ada_classifier,X_test,y_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**7) Cross Validation**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"scores = cross_val_score(ada_classifier,X_test,y_test, cv=5)\n",
"scores.mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**8) Feature importance**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"importance=ada_classifier.feature_importances_\n",
"\n",
"std = np.std([tree.feature_importances_ for tree in ada_classifier.estimators_],\n",
" axis=0)\n",
"indices = np.argsort(importance)\n",
"\n",
"# Plot the feature importances of the forest\n",
"plt.figure()\n",
"plt.title(\"Feature importances\")\n",
"plt.barh(range(X.shape[1]), importance[indices],\n",
" color=\"b\", align=\"center\")\n",
"\n",
"plt.yticks(range(X.shape[1]), colum_names)\n",
"plt.ylim([0, X.shape[1]])\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment