Skip to content

Instantly share code, notes, and snippets.

@Aditii7
Created April 5, 2021 15:39
Show Gist options
  • Save Aditii7/8213d01ee813660821f30adb48e237bc to your computer and use it in GitHub Desktop.
Save Aditii7/8213d01ee813660821f30adb48e237bc to your computer and use it in GitHub Desktop.
Assignment PCA
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import pandas as pd \nimport numpy as np\nfrom sklearn.decomposition import PCA\nimport matplotlib.pyplot as plt\nfrom sklearn.preprocessing import scale ",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "wine = pd.read_csv(\"wine.csv\")\nwine.describe()\nwine.head()",
"execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 2,
"data": {
"text/plain": " Type Alcohol Malic Ash Alcalinity Magnesium Phenols Flavanoids \\\n0 1 14.23 1.71 2.43 15.6 127 2.80 3.06 \n1 1 13.20 1.78 2.14 11.2 100 2.65 2.76 \n2 1 13.16 2.36 2.67 18.6 101 2.80 3.24 \n3 1 14.37 1.95 2.50 16.8 113 3.85 3.49 \n4 1 13.24 2.59 2.87 21.0 118 2.80 2.69 \n\n Nonflavanoids Proanthocyanins Color Hue Dilution Proline \n0 0.28 2.29 5.64 1.04 3.92 1065 \n1 0.26 1.28 4.38 1.05 3.40 1050 \n2 0.30 2.81 5.68 1.03 3.17 1185 \n3 0.24 2.18 7.80 0.86 3.45 1480 \n4 0.39 1.82 4.32 1.04 2.93 735 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Type</th>\n <th>Alcohol</th>\n <th>Malic</th>\n <th>Ash</th>\n <th>Alcalinity</th>\n <th>Magnesium</th>\n <th>Phenols</th>\n <th>Flavanoids</th>\n <th>Nonflavanoids</th>\n <th>Proanthocyanins</th>\n <th>Color</th>\n <th>Hue</th>\n <th>Dilution</th>\n <th>Proline</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>14.23</td>\n <td>1.71</td>\n <td>2.43</td>\n <td>15.6</td>\n <td>127</td>\n <td>2.80</td>\n <td>3.06</td>\n <td>0.28</td>\n <td>2.29</td>\n <td>5.64</td>\n <td>1.04</td>\n <td>3.92</td>\n <td>1065</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>13.20</td>\n <td>1.78</td>\n <td>2.14</td>\n <td>11.2</td>\n <td>100</td>\n <td>2.65</td>\n <td>2.76</td>\n <td>0.26</td>\n <td>1.28</td>\n <td>4.38</td>\n <td>1.05</td>\n <td>3.40</td>\n <td>1050</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1</td>\n <td>13.16</td>\n <td>2.36</td>\n <td>2.67</td>\n <td>18.6</td>\n <td>101</td>\n <td>2.80</td>\n <td>3.24</td>\n <td>0.30</td>\n <td>2.81</td>\n <td>5.68</td>\n <td>1.03</td>\n <td>3.17</td>\n <td>1185</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1</td>\n <td>14.37</td>\n <td>1.95</td>\n <td>2.50</td>\n <td>16.8</td>\n <td>113</td>\n <td>3.85</td>\n <td>3.49</td>\n <td>0.24</td>\n <td>2.18</td>\n <td>7.80</td>\n <td>0.86</td>\n <td>3.45</td>\n <td>1480</td>\n </tr>\n <tr>\n <th>4</th>\n <td>1</td>\n <td>13.24</td>\n <td>2.59</td>\n <td>2.87</td>\n <td>21.0</td>\n <td>118</td>\n <td>2.80</td>\n <td>2.69</td>\n <td>0.39</td>\n <td>1.82</td>\n <td>4.32</td>\n <td>1.04</td>\n <td>2.93</td>\n <td>735</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "wine.data = wine.iloc[:,1:]\nwine.data.head()\n#nummpy array\nWINE= wine.data.values\nWINE",
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": "<ipython-input-3-823564df317b>:1: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access\n wine.data = wine.iloc[:,1:]\n",
"name": "stderr"
},
{
"output_type": "execute_result",
"execution_count": 3,
"data": {
"text/plain": "array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,\n 1.065e+03],\n [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,\n 1.050e+03],\n [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,\n 1.185e+03],\n ...,\n [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,\n 8.350e+02],\n [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,\n 8.400e+02],\n [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,\n 5.600e+02]])"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "wine_normal = scale(WINE)",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "wine_normal",
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 5,
"data": {
"text/plain": "array([[ 1.51861254, -0.5622498 , 0.23205254, ..., 0.36217728,\n 1.84791957, 1.01300893],\n [ 0.24628963, -0.49941338, -0.82799632, ..., 0.40605066,\n 1.1134493 , 0.96524152],\n [ 0.19687903, 0.02123125, 1.10933436, ..., 0.31830389,\n 0.78858745, 1.39514818],\n ...,\n [ 0.33275817, 1.74474449, -0.38935541, ..., -1.61212515,\n -1.48544548, 0.28057537],\n [ 0.20923168, 0.22769377, 0.01273209, ..., -1.56825176,\n -1.40069891, 0.29649784],\n [ 1.39508604, 1.58316512, 1.36520822, ..., -1.52437837,\n -1.42894777, -0.59516041]])"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "pca = PCA(n_components = 6)\npca_values = pca.fit_transform(wine_normal)",
"execution_count": 6,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "var = pca.explained_variance_ratio_\nvar",
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 7,
"data": {
"text/plain": "array([0.36198848, 0.1920749 , 0.11123631, 0.0706903 , 0.06563294,\n 0.04935823])"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "var1 = np.cumsum(np.round(var,decimals = 4)*100)\nvar1",
"execution_count": 8,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 8,
"data": {
"text/plain": "array([36.2 , 55.41, 66.53, 73.6 , 80.16, 85.1 ])"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "pca.components_",
"execution_count": 9,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 9,
"data": {
"text/plain": "array([[ 0.1443294 , -0.24518758, -0.00205106, -0.23932041, 0.14199204,\n 0.39466085, 0.4229343 , -0.2985331 , 0.31342949, -0.0886167 ,\n 0.29671456, 0.37616741, 0.28675223],\n [-0.48365155, -0.22493093, -0.31606881, 0.0105905 , -0.299634 ,\n -0.06503951, 0.00335981, -0.02877949, -0.03930172, -0.52999567,\n 0.27923515, 0.16449619, -0.36490283],\n [-0.20738262, 0.08901289, 0.6262239 , 0.61208035, 0.13075693,\n 0.14617896, 0.1506819 , 0.17036816, 0.14945431, -0.13730621,\n 0.08522192, 0.16600459, -0.12674592],\n [-0.0178563 , 0.53689028, -0.21417556, 0.06085941, -0.35179658,\n 0.19806835, 0.15229479, -0.20330102, 0.39905653, 0.06592568,\n -0.42777141, 0.18412074, -0.23207086],\n [-0.26566365, 0.03521363, -0.14302547, 0.06610294, 0.72704851,\n -0.14931841, -0.10902584, -0.50070298, 0.13685982, -0.07643678,\n -0.17361452, -0.10116099, -0.1578688 ],\n [-0.21353865, -0.53681385, -0.15447466, 0.10082451, -0.03814394,\n 0.0841223 , 0.01892002, 0.25859401, 0.53379539, 0.41864414,\n -0.10598274, -0.26585107, -0.11972557]])"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "plt.plot(var1,color=\"red\")",
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 10,
"data": {
"text/plain": "[<matplotlib.lines.Line2D at 0x256a1673a60>]"
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": "<Figure size 432x288 with 1 Axes>",
"image/png": "\n"
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": " pca_values[:,0:1]",
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 11,
"data": {
"text/plain": "array([[ 3.31675081],\n [ 2.20946492],\n [ 2.51674015],\n [ 3.75706561],\n [ 1.00890849],\n [ 3.05025392],\n [ 2.44908967],\n [ 2.05943687],\n [ 2.5108743 ],\n [ 2.75362819],\n [ 3.47973668],\n [ 1.7547529 ],\n [ 2.11346234],\n [ 3.45815682],\n [ 4.31278391],\n [ 2.3051882 ],\n [ 2.17195527],\n [ 1.89897118],\n [ 3.54198508],\n [ 2.0845222 ],\n [ 3.12440254],\n [ 1.08657007],\n [ 2.53522408],\n [ 1.64498834],\n [ 1.76157587],\n [ 0.9900791 ],\n [ 1.77527763],\n [ 1.23542396],\n [ 2.18840633],\n [ 2.25610898],\n [ 2.50022003],\n [ 2.67741105],\n [ 1.62857912],\n [ 1.90269086],\n [ 1.41038853],\n [ 1.90382623],\n [ 1.38486223],\n [ 1.12220741],\n [ 1.5021945 ],\n [ 2.52980109],\n [ 2.58809543],\n [ 0.66848199],\n [ 3.07080699],\n [ 0.46220914],\n [ 2.10135193],\n [ 1.13616618],\n [ 2.72660096],\n [ 2.82133927],\n [ 2.00985085],\n [ 2.7074913 ],\n [ 3.21491747],\n [ 2.85895983],\n [ 3.50560436],\n [ 2.22479138],\n [ 2.14698782],\n [ 2.46932948],\n [ 2.74151791],\n [ 2.17374092],\n [ 3.13938015],\n [-0.92858197],\n [-1.54248014],\n [-1.83624976],\n [ 0.03060683],\n [ 2.05026161],\n [-0.60968083],\n [ 0.90022784],\n [ 2.24850719],\n [ 0.18338403],\n [-0.81280503],\n [ 1.9756205 ],\n [-1.57221622],\n [ 1.65768181],\n [-0.72537239],\n [ 2.56222717],\n [ 1.83256757],\n [-0.8679929 ],\n [ 0.3700144 ],\n [-1.45737704],\n [ 1.26293085],\n [ 0.37615037],\n [ 0.7620639 ],\n [ 1.03457797],\n [-0.49487676],\n [-2.53897708],\n [ 0.83532015],\n [ 0.78790461],\n [-0.80683216],\n [-0.55804262],\n [-1.11511104],\n [-0.55572283],\n [-1.34928528],\n [-1.56448261],\n [-1.93255561],\n [ 0.74666594],\n [ 0.95745536],\n [ 2.54386518],\n [-0.54395259],\n [ 1.03104975],\n [ 2.25190942],\n [ 1.41021602],\n [ 0.79771979],\n [-0.54953173],\n [-0.16117374],\n [-0.65979494],\n [ 0.39235441],\n [-1.77249908],\n [-0.36626736],\n [-1.62067257],\n [ 0.08253578],\n [ 1.57827507],\n [ 1.42056925],\n [-0.27870275],\n [-1.30314497],\n [-0.45707187],\n [-0.49418585],\n [ 0.48207441],\n [-0.25288888],\n [-0.10722764],\n [-2.4330126 ],\n [-0.55108954],\n [ 0.73962193],\n [ 1.33632173],\n [-1.177087 ],\n [-0.46233501],\n [ 0.97847408],\n [-0.09680973],\n [ 0.03848715],\n [-1.5971585 ],\n [-0.47956492],\n [-1.79283347],\n [-1.32710166],\n [-2.38450083],\n [-2.9369401 ],\n [-2.14681113],\n [-2.36986949],\n [-3.06384157],\n [-3.91575378],\n [-3.93646339],\n [-3.09427612],\n [-2.37447163],\n [-2.77881295],\n [-2.28656128],\n [-2.98563349],\n [-2.3751947 ],\n [-2.20986553],\n [-2.625621 ],\n [-4.28063878],\n [-3.58264137],\n [-2.80706372],\n [-2.89965933],\n [-2.32073698],\n [-2.54983095],\n [-1.81254128],\n [-2.76014464],\n [-2.7371505 ],\n [-3.60486887],\n [-2.889826 ],\n [-3.39215608],\n [-1.0481819 ],\n [-1.60991228],\n [-3.14313097],\n [-2.2401569 ],\n [-2.84767378],\n [-2.59749706],\n [-2.94929937],\n [-3.53003227],\n [-2.40611054],\n [-2.92908473],\n [-2.18141278],\n [-2.38092779],\n [-3.21161722],\n [-3.67791872],\n [-2.4655558 ],\n [-3.37052415],\n [-2.60195585],\n [-2.67783946],\n [-2.38701709],\n [-3.20875816]])"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "x = pca_values[:,0:1]\ny = pca_values[:,1:2]\n\nplt.scatter(x,y)",
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 12,
"data": {
"text/plain": "<matplotlib.collections.PathCollection at 0x256a1748fa0>"
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": "<Figure size 432x288 with 1 Axes>",
"image/png": "\n"
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.8.5",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "Assignment PCA",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment