Skip to content

Instantly share code, notes, and snippets.

@rakuishi
Last active February 3, 2022 02:14
Show Gist options
  • Save rakuishi/5e56cb5044495fb79f7630b0d4f7bf4d to your computer and use it in GitHub Desktop.
Save rakuishi/5e56cb5044495fb79f7630b0d4f7bf4d to your computer and use it in GitHub Desktop.
【キカガク流】人工知能・機械学習 脱ブラックボックス講座 - 中級編 -
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x1</th>\n",
" <th>x2</th>\n",
" <th>x3</th>\n",
" <th>x4</th>\n",
" <th>x5</th>\n",
" <th>x6</th>\n",
" <th>x7</th>\n",
" <th>x8</th>\n",
" <th>x9</th>\n",
" <th>x10</th>\n",
" <th>x11</th>\n",
" <th>x12</th>\n",
" <th>x13</th>\n",
" <th>y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.00632</td>\n",
" <td>18.0</td>\n",
" <td>2.31</td>\n",
" <td>0</td>\n",
" <td>0.538</td>\n",
" <td>6.575</td>\n",
" <td>65.2</td>\n",
" <td>4.0900</td>\n",
" <td>1</td>\n",
" <td>296</td>\n",
" <td>15.3</td>\n",
" <td>396.90</td>\n",
" <td>4.98</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.02731</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0</td>\n",
" <td>0.469</td>\n",
" <td>6.421</td>\n",
" <td>78.9</td>\n",
" <td>4.9671</td>\n",
" <td>2</td>\n",
" <td>242</td>\n",
" <td>17.8</td>\n",
" <td>396.90</td>\n",
" <td>9.14</td>\n",
" <td>21.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.02729</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0</td>\n",
" <td>0.469</td>\n",
" <td>7.185</td>\n",
" <td>61.1</td>\n",
" <td>4.9671</td>\n",
" <td>2</td>\n",
" <td>242</td>\n",
" <td>17.8</td>\n",
" <td>392.83</td>\n",
" <td>4.03</td>\n",
" <td>34.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.03237</td>\n",
" <td>0.0</td>\n",
" <td>2.18</td>\n",
" <td>0</td>\n",
" <td>0.458</td>\n",
" <td>6.998</td>\n",
" <td>45.8</td>\n",
" <td>6.0622</td>\n",
" <td>3</td>\n",
" <td>222</td>\n",
" <td>18.7</td>\n",
" <td>394.63</td>\n",
" <td>2.94</td>\n",
" <td>33.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.06905</td>\n",
" <td>0.0</td>\n",
" <td>2.18</td>\n",
" <td>0</td>\n",
" <td>0.458</td>\n",
" <td>7.147</td>\n",
" <td>54.2</td>\n",
" <td>6.0622</td>\n",
" <td>3</td>\n",
" <td>222</td>\n",
" <td>18.7</td>\n",
" <td>396.90</td>\n",
" <td>5.33</td>\n",
" <td>36.2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 \\\n",
"0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 \n",
"1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 \n",
"2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 \n",
"3 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 \n",
"4 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 \n",
"\n",
" x13 y \n",
"0 4.98 24.0 \n",
"1 9.14 21.6 \n",
"2 4.03 34.7 \n",
"3 2.94 33.4 \n",
"4 5.33 36.2 "
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# CSV ファイルの読み込み\n",
"df = pd.read_csv('housing.csv')\n",
"df.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"506"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# レコード数の確認\n",
"len(df)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x1</th>\n",
" <th>x2</th>\n",
" <th>x3</th>\n",
" <th>x4</th>\n",
" <th>x5</th>\n",
" <th>x6</th>\n",
" <th>x7</th>\n",
" <th>x8</th>\n",
" <th>x9</th>\n",
" <th>x10</th>\n",
" <th>x11</th>\n",
" <th>x12</th>\n",
" <th>x13</th>\n",
" <th>y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" <td>506.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>3.613524</td>\n",
" <td>11.363636</td>\n",
" <td>11.136779</td>\n",
" <td>0.069170</td>\n",
" <td>0.554695</td>\n",
" <td>6.284634</td>\n",
" <td>68.574901</td>\n",
" <td>3.795043</td>\n",
" <td>9.549407</td>\n",
" <td>408.237154</td>\n",
" <td>18.455534</td>\n",
" <td>356.674032</td>\n",
" <td>12.653063</td>\n",
" <td>22.532806</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>8.601545</td>\n",
" <td>23.322453</td>\n",
" <td>6.860353</td>\n",
" <td>0.253994</td>\n",
" <td>0.115878</td>\n",
" <td>0.702617</td>\n",
" <td>28.148861</td>\n",
" <td>2.105710</td>\n",
" <td>8.707259</td>\n",
" <td>168.537116</td>\n",
" <td>2.164946</td>\n",
" <td>91.294864</td>\n",
" <td>7.141062</td>\n",
" <td>9.197104</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.006320</td>\n",
" <td>0.000000</td>\n",
" <td>0.460000</td>\n",
" <td>0.000000</td>\n",
" <td>0.385000</td>\n",
" <td>3.561000</td>\n",
" <td>2.900000</td>\n",
" <td>1.129600</td>\n",
" <td>1.000000</td>\n",
" <td>187.000000</td>\n",
" <td>12.600000</td>\n",
" <td>0.320000</td>\n",
" <td>1.730000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.082045</td>\n",
" <td>0.000000</td>\n",
" <td>5.190000</td>\n",
" <td>0.000000</td>\n",
" <td>0.449000</td>\n",
" <td>5.885500</td>\n",
" <td>45.025000</td>\n",
" <td>2.100175</td>\n",
" <td>4.000000</td>\n",
" <td>279.000000</td>\n",
" <td>17.400000</td>\n",
" <td>375.377500</td>\n",
" <td>6.950000</td>\n",
" <td>17.025000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.256510</td>\n",
" <td>0.000000</td>\n",
" <td>9.690000</td>\n",
" <td>0.000000</td>\n",
" <td>0.538000</td>\n",
" <td>6.208500</td>\n",
" <td>77.500000</td>\n",
" <td>3.207450</td>\n",
" <td>5.000000</td>\n",
" <td>330.000000</td>\n",
" <td>19.050000</td>\n",
" <td>391.440000</td>\n",
" <td>11.360000</td>\n",
" <td>21.200000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>3.677082</td>\n",
" <td>12.500000</td>\n",
" <td>18.100000</td>\n",
" <td>0.000000</td>\n",
" <td>0.624000</td>\n",
" <td>6.623500</td>\n",
" <td>94.075000</td>\n",
" <td>5.188425</td>\n",
" <td>24.000000</td>\n",
" <td>666.000000</td>\n",
" <td>20.200000</td>\n",
" <td>396.225000</td>\n",
" <td>16.955000</td>\n",
" <td>25.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>88.976200</td>\n",
" <td>100.000000</td>\n",
" <td>27.740000</td>\n",
" <td>1.000000</td>\n",
" <td>0.871000</td>\n",
" <td>8.780000</td>\n",
" <td>100.000000</td>\n",
" <td>12.126500</td>\n",
" <td>24.000000</td>\n",
" <td>711.000000</td>\n",
" <td>22.000000</td>\n",
" <td>396.900000</td>\n",
" <td>37.970000</td>\n",
" <td>50.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" x1 x2 x3 x4 x5 x6 \\\n",
"count 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 \n",
"mean 3.613524 11.363636 11.136779 0.069170 0.554695 6.284634 \n",
"std 8.601545 23.322453 6.860353 0.253994 0.115878 0.702617 \n",
"min 0.006320 0.000000 0.460000 0.000000 0.385000 3.561000 \n",
"25% 0.082045 0.000000 5.190000 0.000000 0.449000 5.885500 \n",
"50% 0.256510 0.000000 9.690000 0.000000 0.538000 6.208500 \n",
"75% 3.677082 12.500000 18.100000 0.000000 0.624000 6.623500 \n",
"max 88.976200 100.000000 27.740000 1.000000 0.871000 8.780000 \n",
"\n",
" x7 x8 x9 x10 x11 x12 \\\n",
"count 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 \n",
"mean 68.574901 3.795043 9.549407 408.237154 18.455534 356.674032 \n",
"std 28.148861 2.105710 8.707259 168.537116 2.164946 91.294864 \n",
"min 2.900000 1.129600 1.000000 187.000000 12.600000 0.320000 \n",
"25% 45.025000 2.100175 4.000000 279.000000 17.400000 375.377500 \n",
"50% 77.500000 3.207450 5.000000 330.000000 19.050000 391.440000 \n",
"75% 94.075000 5.188425 24.000000 666.000000 20.200000 396.225000 \n",
"max 100.000000 12.126500 24.000000 711.000000 22.000000 396.900000 \n",
"\n",
" x13 y \n",
"count 506.000000 506.000000 \n",
"mean 12.653063 22.532806 \n",
"std 7.141062 9.197104 \n",
"min 1.730000 5.000000 \n",
"25% 6.950000 17.025000 \n",
"50% 11.360000 21.200000 \n",
"75% 16.955000 25.000000 \n",
"max 37.970000 50.000000 "
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 統計量の算出\n",
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# 分布の確認\n",
"%matplotlib inline\n",
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x1a1cb07b70>"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEKCAYAAAACS67iAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAHdZJREFUeJzt3Wl0XHeZ5/HvU9olS7JlyY6XOHYW\nm6x0QBNCmGFLBzI0hzAcuic06yQzZu/uoad76OEM9OkF+vTMdNjSgBtCoCEJDDAkTSeB7PvmBK/x\nvsiWLVuSte+lus+8uFUlWbEtRaoq1b3+fc7xUVXpSve5Jevx4+e/XHN3REQk+hLzHYCIiOSGErqI\nSEwooYuIxIQSuohITCihi4jEhBK6iEhMKKGLiMSEErqISEwooYuIxERpIU/W2Njoq1evLuQpRUQi\n78UXX+x096bpjitoQl+9ejUbN24s5ClFRCLPzFpmcpxaLiIiMaGELiISE0roIiIxoYQuIhITSugi\nIjGhhC4iEhNK6CIiMaGELiKSR6nAGR5LkQryf7tPJXQRkTzadLiHi794P0/s6cj7uZTQRUTyKPCw\nMi9JWN7PpYQuIpJHmVZLwpTQRUQiLVOhK6GLiERcEIQf1XIREYm4VLaHnv9zTXsKM7vNzNrNbNsp\nPvffzMzNrDE/4YmIRFtQZD3024Hrp75oZucC1wGHchyTiEhsZAZFi6Ll4u6PA12n+NQtwJ8D+Z8t\nLyISUaliHxQ1s/cAR9x9c47jERGJlaCAFfqrvgWdmVUDXwDeMcPj1wPrAVatWvVqTyciEmmpIl9Y\ndAGwBthsZgeBlcBLZnbOqQ529w3u3uzuzU1N097jVEQkVgq5sOhVV+juvhVYknmeTurN7t6Zw7hE\nRGKhqJb+m9mdwDPAOjNrNbOb8x6ViEhMpDILi4qhQnf3D0zz+dU5i0ZEJGay89CLYWGRiIjMXrEP\nioqIyAxlFxYV6zx0ERGZmexui6rQRUSiTRW6iEhMZOehq0IXEYm2opqHLiIis1fIeehK6CIieTQx\nKJr/cymhi4jkkQZFRURioqhucCEiIrMXuGMGpgpdRCTaUoEXpN0CSugiInmVci/IXuighC4ikldB\n4AWZ4QJK6CIieRV4YWa4gBK6iEhepQIvyLJ/UEIXEcmrwL0gUxZBCV1EJK80y0VEJCYCL6KWi5nd\nZmbtZrZt0mv/y8x2mtkWM/t/ZrYwv2GKiERTsVXotwPXT3ntAeAyd78C2A38RY7jEhGJhVRQmGX/\nMIOE7u6PA11TXvuNu4+nnz4LrMxDbCIikRe2XApzrlyc5ibgvhx8HxGR2Cm2lstpmdkXgHHgx2c4\nZr2ZbTSzjR0dHXM5nYhI5KSKaVD0dMzso8C7gQ+6p3dwPwV33+Duze7e3NTUNNvTiYhEUlDACr10\nNl9kZtcD/x14i7sP5TYkEZH4SAVFtLDIzO4EngHWmVmrmd0MfBOoBR4ws01m9u08xykiEklBAXdb\nnLZCd/cPnOLl7+UhFhGR2CmqCl1ERGYv5RT/oKiIiEwvHBQtzLmU0EVE8kgtFxGRmNAt6EREYiJQ\nhS4iEg8p3eBCRCQegkAtFxGRWFCFLiISE6kACpTPldBFRPJJLRcRkZhQy0VEJCaCIAL7oYuIyPQC\nj8gdi0RE5MzUchERiYkgQIOiIiJxEG7OVZhzKaGLiOSRWi4iIjGheegiIjGhCl1EJCZSxVShm9lt\nZtZuZtsmvdZgZg+Y2Z70x0X5DVNEJJqKbT/024Hrp7z2eeAhd78IeCj9XEREpiiqlou7Pw50TXn5\nBuAH6cc/AN6b47hERGIhCvPQl7p7G0D645LTHWhm681so5lt7OjomOXpRESiKazQC3OuvJ/G3Te4\ne7O7Nzc1NeX7dCIiRSUVFP9eLsfNbBlA+mN77kISEYmHIHCAot9t8R7go+nHHwXuzk04IiLxkfIw\noRdNhW5mdwLPAOvMrNXMbgb+DrjOzPYA16Wfi4jIJKkCV+il0x3g7h84zaeuzXEsIiKxEmQq9CJv\nuYiIyDQyFXrRtFxERGR2giD8WOyDoiIiMo2JQdHCnE8JXUQkT7Itl2IZFC0Wdzx3aN7O/YdvWDVv\n5xaR6MoMipp66CIi0VboCl0JXUQkTzTLRUQkJjItF81yERGJuImWS2HOp4QuIpIn6Xxe9Puhi4jI\nNLT0X0QkJjQoKiISE4XebVEJXUQkT4Ji2w9dRERmRwuLRERiQvPQRURiIpXePlctFxGRiJsYFC3M\n+ZTQRUTyJFKDomb2X81su5ltM7M7zawyV4GJiERdZAZFzWwF8EdAs7tfBpQAN+YqMBGRqEtFbFC0\nFKgys1KgGjg695BEROIhiMpKUXc/Avxv4BDQBvS6+29yFZiISNRFqeWyCLgBWAMsB2rM7EOnOG69\nmW00s40dHR2zj1REJGKy89CLvUIHfhc44O4d7p4EfgFcM/Ugd9/g7s3u3tzU1DSH04mIREt2Hnqx\nV+iErZarzazawjugXgvsyE1YIiLRl/KI3ODC3Z8Dfga8BGxNf68NOYpLRCTyMoOihWq5lM7li939\nS8CXchSLiEisRGZQVEREziwVoUFRERE5g0A3uBARiYdUlPZyERGR0wu026KISDzoJtEiIjGRCvO5\nZrmIiESdBkVFRGJCg6IiIjGRvWORKnQRkWgr9NJ/JXQRkTyJ0m6LIiJyBhNL/wtzPiV0EZE8CQIn\nYWBquYiIRFvKvWDtFlBCFxHJm7BCV0IXEYm8VKAKXUQkFlLuBVtUBEroIiJ5EwResGX/oIQuIpI3\nkRoUNbOFZvYzM9tpZjvM7I25CkxEJOpSQeFWicIcbxINfA24393fb2blQHUOYhIRiYUgcEoK2AeZ\ndUI3szrgzcDHANx9DBjLTVgiItEXpUHR84EO4Ptm9lsz+66Z1eQoLhGRyIvSoGgp8DrgW+5+JTAI\nfH7qQWa23sw2mtnGjo6OOZxORCRaojQo2gq0uvtz6ec/I0zwJ3H3De7e7O7NTU1NczidiEi0pIKI\ntFzc/Rhw2MzWpV+6Fng5J1GJiMRA4E4B8/mcZ7l8FvhxeobLfuA/zT0kEZF4KPTS/zkldHffBDTn\nKBYRkVgp9Dx0rRQVEcmTIEKDoiIicgbabVFEJCYC137oIiKxoApdRCQmIjMPXUREzixwJ1HALKuE\nLiKSJ4GjlouISBykdJNoEZF40Dx0EZGY0KCoiEhMpCK0H7qIiJxBEKE7FomIyBloYZGISEwEjlou\nIiJxEA6KFu58SugiInmiQVERkZjQoKiISExoUFREJCbCzbmU0EVEIi9yK0XNrMTMfmtmv8pFQCIi\ncRHFlssfAzty8H1ERGIlcChggT63hG5mK4HfA76bm3BEROIjai2XrwJ/DgSnO8DM1pvZRjPb2NHR\nMcfTiYhERyoq2+ea2buBdnd/8UzHufsGd2929+ampqbZnk5EJHKCCC0sehPwHjM7CNwFvN3MfpST\nqEREYiAVlYVF7v4X7r7S3VcDNwIPu/uHchaZiEiEuTuuzblERKIvFThAQSv00lx8E3d/FHg0F99L\nRCQOUp5O6AUsm1Whi4jkQZCe+6eWi4hIxGUr9CgMioqIyOlle+iq0EVEos3TFXpCFbqISLSpQhcR\niYlMD12DoiIiEZeZ5aJBURGRiNM8dBGRmAgCDYqKiMSCBkVFRGJiouWihC4iEmlquYiIxIQq9Gm4\nO1uP9GZ7UyIixeaJPR10DY5l85Qq9NNo6x3hzucPsetY33yHIiLyCqPjKT72/Rf40bMtE/PQVaGf\n2uDYePhxNDXPkYiIvFLvcJJU4GGFrnnoZzY8Fiby4aQSuogUn77hJDCR2AFMLZdTyyRyJXQRKUa9\n6YTeN5wk0H7oZzaSDJtSSugiUoz6hsO28OQKPRI9dDM718weMbMdZrbdzP44l4GdSrblMqaELiLF\np3dSy2U+5qHP5SbR48CfuvtLZlYLvGhmD7j7yzmK7RUylfmIKnQRKUKTE3qk5qG7e5u7v5R+3A/s\nAFbkKrBTUQ9dRIpZ7ykGRSM3y8XMVgNXAs+d4nPrzWyjmW3s6OiY03lG1HIRkSKWSeij40G2kxCp\nhUVmtgD4OfAn7v6KFT/uvsHdm929uampaU7nUoUuIsUsM20RoHsofByJlguAmZURJvMfu/svchPS\n6U3uoWduwCoiUix6JyX0rsExICIVuoWz5b8H7HD3f8hdSKeXabUEDmPjQSFOKSIyY5MTenc6oUel\nQn8T8GHg7Wa2Kf3nXTmK6xUCd0aSKWorw4k5aruISLHpHU6ypLYCgK6hwif0WU9bdPcngYJFOjYe\n4MCi6nL6R8YZTqZYWKiTi4jMQN9wknMbqmnvH6Un3UOPRMul0DLtloaa8pOei4gUi97hJOcuqgIm\neuhRabkUVKbFsqg6TOhaXCQixWQ8FTA4luLchmoAujMtF1Xor5RJ6NkKXQldRIpI30i4j8vimnKq\ny0uyg6KJqC0sKgS1XESkmGVmuNRXl1FfVZZN8Gq5nEKmxbKwqgxj0iKjsRQvtnRpXrqIFNyz+0+w\n7UgvMCmhV4UJPUMtl1PIJPCq8hIqy0qyz1861M3PXzrCiYGx+QxPRM5C/+MXW/nKfTuAiYReV1lG\nXeVEQk9EYdpioQ0nUxhQXpqgqrwkuzf6icHR7MfG9PxPEZF8SwXO4e4hRtOLHPsmVeh1qtDPbHgs\nRWVZCQkzqspKsj30zNSgE4Oq0EWkcI72DJNMOW29w4yNB6dtuRSyQo9OQk+mqCovAQgTerrlkmm1\ndCmhi0gBHeoaAsKtSFq7hyZaLlN76ErorzSSTFFVFib0yvKwQg/cs6uxlNBFpJBaTgxNPO4aom84\nSUVpgsqyEg2KTmd47JUV+uS7giihi0ghZSp0gEMnwgo9k8jrqyaGJzUP/RSGJ1XoVWUJRpKpbBJf\nXl9J1+BY9i7bvcNJWruHTvu9RERerWO9I2w+3JN9fqhrkDWNNVSVldByYoi+kWR2MFSDotMYTgZU\nlk1U6OOBc7xvBIALl9QyHjj96Yn8921r43tPHsgmeBGRufryvTv40Hefy95aruXEEOctrmZVQzWH\nuganVOjz00OPxLRFd2dk7OQeOsCR7mFKzFjTWMPjezroGhyjvqqMlhPhVKJjvSMsX1g1n6GLSBG5\n47lDs/7ax3Z30D86zi0P7GZZfSV72weorSwjkTC2tPZSWmLUVpRxx3OHaDkxmP26O58/DMAfvmHV\nnOOfTiQq9OFkipT7ST10gCM9wyysLqNxQbgdQNfgKD1DY9nR5sk9LhGR2ZqcV1q6hhgaSzE6HrC4\nppzFNeV0DY6dNM6X6SYUsDgPz1fY081O5o3M9tDTb1pH/yiLF5SzsLqchIVz0VvSSTxhnPSv5N2b\njvDwzuMFjlxEouhA5yDfenRvdsuRyXnl0InB7PhdQ005DTXljAfhjLupOcoK2D+HqCX0KRW6E76h\nJQljYXX4r2TLiSHKSxK85py6bIXeP5Lk+QNdPLm3k/GUbl0nImf2zP4THO4eZvvRcJ+WyXmlpWso\nu5Axk9AhzEeTx/lAFfop9Q2Hg51VU94sgIaaivTHMKEfOjHIyoYq1jTW0D2UpG84ybYjvTgwkgzY\n0z4AQDIVsOHxfWxp7UFEzl49Q2N84+E92Zlxo8kUu471AbClNUzok/NKz1CSg53h//4b0i2XjEzR\nWVaSoDRhqtBPJVOhV5aF4U5O6Jk3s6G6nI7+Udp6RzivoYZV6U3mW7qG2NLay5LaCqrLS9icTuAb\nW7o5eGKI+7cfy45aD42Oc9+2NvpGJm70KiLxEbjz8M7jHJ40vvbo7g7aekd44OWwJbvjWD/JlLOm\nsYZ9HQN0DY6l80p1Nq9sPdJLXWUpZSUJ6qvLsvfinJybqspKolWhm9n1ZrbLzPaa2edzFdRUU3vo\nmVkuMLE/ekNNOaPp+46et7ia5QurKCsxtrT20NI1xO+cu5DLVtSzo62P4bEUj+/uoKailJ6hZHZu\n6a+2tvHEnk5++dsj2e14d7T18dHbnqejf3QinqHkSX8hRGT+DYyOZytnCGfH/eU92/nKfTuyv88v\nHuzmwR3t3PH8IUbTixNfbOmmpqKUPe0DtHYPsaW1h/qqMt59xTICD6dBh3mlJptXhpOpbO4pTSRY\nWB1OU5yc0CvLS7DC3XYZmENCN7MS4Fbg3wOXAB8ws0tyFdhkU3voCTMqSsPQJyd0CO9avaqhmpKE\nsXJRNduPhv91unxFPVesrCeZcn6y8RC9w0ne/7oVLKuv5NHd7ew81semwz2cU1fJzmP9bD3Sy9Ge\nYe564RCP7e7gEz96kdHxFIe7hnj3N5/gulse48k9nUC4SvXj/7yRL969jdHxcBBlcHSc7zy2L7tX\nMoQ3un7pUDdj4xN9/CBwhsbG8/G2iRS90fFU9ncmY+exPjoHJgqo7sExbn1kL/s7BrKv/fzFVt73\nj0/xYks3AO39I/yHW5/iulse4/5txwD4+kN7uf3pg3znsf1894kDHO8b4b7tbSypraBvOMmvXz7G\nk3s6cHduetNqKssS3L/9GHuOD3D5inrOqatkSW0F24/2YcC5iybyCky0e8PHYf6pKp/fCn0u89Cv\nAva6+34AM7sLuAF4OReBTTbRcjn5zaooTVBWEib2xempi0vrKrPHrWqo5kDnICsXVbF4QQWLasqp\nqyxl9/EBli+sZO3SWsZSzp3PH+LO5w/RVFvBJ95yAf/0xH7+ZfNRSksSVJeX8qfvWMsX797On9y1\nic2HexgYHWflompu+sELfP761/C9Jw/Q3j9CMuVsae3l428+n6/ct5NDXUOUJIxPv/UCLltRz1fu\n28mBzkHOb6zhC793MYNjKb7x0B72dQzw3itX8PE3X8CW1h5uf/ogg6PjfPAN5/GuK5bxyM527t50\nhPqqMv6g+VwuX1nPr7cd45FdHaxduoAbfmcF9VVl/Hr7MTa39tJ83iLecelS+obHeWRXO0d7hrnm\ngsW88YJGDnYOpgeHnWsuXMwly+rYfrSPFw52UV9VxtXnN7C0rpJNh3vYfrSPVQ3VNJ+3iNKSBJsO\nd3Owc4h159Ryxcp6+kfG2dLaQ/dQkkuX17HunFqO9oxkxywuW17HykXV7O8cYGdbP3VVpVy6vJ76\nqjJ2HevnQOcgy+oruXh5HQC7j/XT1jvCmsYaLlyygIHRcXYd66d/ZJyLli7gvIZqjvWNsOf4AGaw\ndmktS2oraOkaYl/7APVVZaxdWktVeQn7OwY53D3EsvpKLmhaQODO3vYBOvpHWd1Yw+rFNfSNJNlz\nfIDh5DgXNtWyYlEVx/pG2Ns+QGnCuHDJAhpqymk5McSBzkEWVpexdkktFWUJ9qarueULq7hwyQKS\nKWfP8X46B0ZZ07ggPYYzxq5j/YwkU6xdWsvKRVW0dg+z63g/5aUJLj6njoaacva2D7CnvZ/GBRVc\nvKyO8tIEO9r6sgtXLl5Wx/BYim1He+noH+U159Sydmktx/tG2HS4h7HxgMtX1rN6cQ172wfYdLiH\nBRWlXLlqIY0LKth8uIctR3pZuaiK5vMaKEkYzx/oYvfxfi5ZXsdVqxvoGhrjqb2dtPWOcNXqBl6/\nehG7j/Xz6K4OkqmAt6xt4tLl9Ty9r5OHd7azqKacd166lOULq/jXLW08vqeTi5fVcsNrVxC489ON\nh3nhYDdvW9fE+1+/kt3HB/j+Uwc41DXE779+Je+9cgX3bm3ju08eIAicm/7tGq59zVK+9tBuHtzR\nTk15CZ9++4WsWVzD/7x7O50Do3z9oT382TvX0do9zO1PH6S8JMGNG57hz965jrueP0xb7wgXLanl\n03e8xAffsIofPtPC+65cwch4iq/ct4NfbjrCeMr58NXn8cz+Ezy97wSlCeO1KxeyrL6Kay5o5OGd\n7QBcsbIeM+OKlfU8uKOdJXUV2WSdySsNk3rnDTUV7OsYzLaFMzkqUeAe+lwS+grg8KTnrcAb5hbO\nqfUNJ6ksS5z05lSXl1BeOvHmZd7cTI8LwtYLwBUr6oGwsr98RT1P7TvBW9cuwcy4dHkdTbUVdPaP\n8r4rV1BemuB9r1vBrY/spSQV8PE3X8BH3riaEwNjfO2hPSyqLuPO9VezvL6KD9/2HH/1q5dZVl/J\nzz5xDW29w3zup5v55I9fYlVDNbd9rJlfbWnj6w/vBeD8phq++O5L+NGzLdz8g40AXLRkATdetYpf\nvNTKL146AsC6pbUsqa3kb+/dwd/euyP72sETQzy4oz17fasXV/PU3k7+6YkD2dcaF5TzL5uP8qV7\ntmdfqyxL8MNnWk56T83glgdn+xPJPzOYutB3Lq9F6fvPp5KE8a1H9530vMSM7zy+P/tafVUZQ2Pj\nbJj02prGGp7a28l3HgtfKy9NcNnyOr792D7+Mf39ViysYu3SWr7xyN7s78S1r1lCImF89cE9fPXB\nPdSUl/C569ay9Ugvf3//LgAuXlbHLf/xtdz+1EH+5l/D34eb3rSGT73tAj730818+d6dLKgo5Yc3\nX8XFy+q46fsv8MNnWrhy1UK+/L7LCdxpOTHE9qN9XH/pOSxeUMF1lyzl5bY+eoeSvGVdEwDXXLCY\nJ/d2sqCilBXpBYlXrFzIgzvaWdVQk73WTF6ZPBiaeVxdPpFSq8tLCrp1LoDN9tZtZvb7wDvd/T+n\nn38YuMrdPzvluPXA+vTTdcCuWcbaCHTO8mujStd8dtA1nx3mcs3nuXvTdAfNpUJvBc6d9HwlcHTq\nQe6+Adgwh/MAYGYb3b15rt8nSnTNZwdd89mhENc8l1kuLwAXmdkaMysHbgTuyU1YIiLyas26Qnf3\ncTP7DPBroAS4zd23T/NlIiKSJ3PabdHd7wXuzVEs05lz2yaCdM1nB13z2SHv1zzrQVERESkukVj6\nLyIi0yu6hD7ddgJmVmFmP0l//jkzW134KHNrBtf8OTN72cy2mNlDZnbefMSZSzPdNsLM3m9mbmaR\nnhExk+s1sz9I/5y3m9kdhY4x12bw93qVmT1iZr9N/91+13zEmUtmdpuZtZvZttN83szs6+n3ZIuZ\nvS6nAbh70fwhHFzdB5wPlAObgUumHPMp4NvpxzcCP5nvuAtwzW8DqtOPP3k2XHP6uFrgceBZoHm+\n487zz/gi4LfAovTzJfMddwGueQPwyfTjS4CD8x13Dq77zcDrgG2n+fy7gPsIdym5Gngul+cvtgo9\nu52Au48Bme0EJrsB+EH68c+Aa63Qe1Tm1rTX7O6PuHtmN7BnCef8R9lMfs4Afw38PTBSyODyYCbX\n+1+AW929G8Dd24m2mVyzA3Xpx/WcYh1L1Lj740DXGQ65Afihh54FFprZslydv9gS+qm2E1hxumPc\nfRzoBRYXJLr8mMk1T3Yz4b/wUTbtNZvZlcC57v6rQgaWJzP5Ga8F1prZU2b2rJldX7Do8mMm1/yX\nwIfMrJVwttxnib9X+/v+qhTbTaJPVWlPnYYzk2OiZMbXY2YfApqBt+Q1ovw74zWbWQK4BfhYoQLK\ns5n8jEsJ2y5vJfwf2BNmdpm7R/UOLDO55g8At7v7/zGzNwL/nL7mON9WLK/5q9gq9JlsJ5A9xsxK\nCf+rdqb/4hS7GW2hYGa/C3wBeI+7j079fMRMd821wGXAo2Z2kLDXeE+EB0Zn+vf6bndPuvsBwj2P\nLipQfPkwk2u+GfgpgLs/A1QS7ncSZzP6fZ+tYkvoM9lO4B7go+nH7wce9vRoQ0RNe83p9sN3CJN5\n1HurMM01u3uvuze6+2p3X004bvAed984P+HO2Uz+Xv+ScPAbM2skbMHsJ7pmcs2HgGsBzOxiwoTe\nUdAoC+8e4CPp2S5XA73u3paz7z7fo8KnGQXeTThC/oX0a39F+AsN4Q/9/wJ7geeB8+c75gJc84PA\ncWBT+s898x1zvq95yrGPEuFZLjP8GRvwD4T3E9gK3DjfMRfgmi8BniKcAbMJeMd8x5yDa74TaAOS\nhNX4zcAngE9M+jnfmn5Ptub677VWioqIxESxtVxERGSWlNBFRGJCCV1EJCaU0EVEYkIJXUQkJpTQ\nRSYxszozO2Jm35zvWEReLSV0kZP9NfDYfAchMhtK6HLWMbN/k96LutLMatL7j19mZq8HlgK/me8Y\nRWaj2DbnEsk7d3/BzO4B/gaoAn5EuELzYeDDpJeji0SNErqcrf6KcL+REeCPCG+ccq+7H4729vpy\nNlNCl7NVA7AAKCPcH+iNwL8zs0+lXy83swF3P+3t8USKjfZykbNSuuVyF7AGWObun5n0uY8Rbpr0\nmdN8uUhRUoUuZx0z+wgw7u53mFkJ8LSZvd3dH57v2ETmQhW6iEhMaNqiiEhMKKGLiMSEErqISEwo\noYuIxIQSuohITCihi4jEhBK6iEhMKKGLiMTE/wc4NR/oK6ngFgAAAABJRU5ErkJggg==\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x1a1e243d30>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.distplot(df['x4'], bins=10)"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x1</th>\n",
" <th>x2</th>\n",
" <th>x3</th>\n",
" <th>x4</th>\n",
" <th>x5</th>\n",
" <th>x6</th>\n",
" <th>x7</th>\n",
" <th>x8</th>\n",
" <th>x9</th>\n",
" <th>x10</th>\n",
" <th>x11</th>\n",
" <th>x12</th>\n",
" <th>x13</th>\n",
" <th>y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>x1</th>\n",
" <td>1.000000</td>\n",
" <td>-0.200469</td>\n",
" <td>0.406583</td>\n",
" <td>-0.055892</td>\n",
" <td>0.420972</td>\n",
" <td>-0.219247</td>\n",
" <td>0.352734</td>\n",
" <td>-0.379670</td>\n",
" <td>0.625505</td>\n",
" <td>0.582764</td>\n",
" <td>0.289946</td>\n",
" <td>-0.385064</td>\n",
" <td>0.455621</td>\n",
" <td>-0.388305</td>\n",
" </tr>\n",
" <tr>\n",
" <th>x2</th>\n",
" <td>-0.200469</td>\n",
" <td>1.000000</td>\n",
" <td>-0.533828</td>\n",
" <td>-0.042697</td>\n",
" <td>-0.516604</td>\n",
" <td>0.311991</td>\n",
" <td>-0.569537</td>\n",
" <td>0.664408</td>\n",
" <td>-0.311948</td>\n",
" <td>-0.314563</td>\n",
" <td>-0.391679</td>\n",
" <td>0.175520</td>\n",
" <td>-0.412995</td>\n",
" <td>0.360445</td>\n",
" </tr>\n",
" <tr>\n",
" <th>x3</th>\n",
" <td>0.406583</td>\n",
" <td>-0.533828</td>\n",
" <td>1.000000</td>\n",
" <td>0.062938</td>\n",
" <td>0.763651</td>\n",
" <td>-0.391676</td>\n",
" <td>0.644779</td>\n",
" <td>-0.708027</td>\n",
" <td>0.595129</td>\n",
" <td>0.720760</td>\n",
" <td>0.383248</td>\n",
" <td>-0.356977</td>\n",
" <td>0.603800</td>\n",
" <td>-0.483725</td>\n",
" </tr>\n",
" <tr>\n",
" <th>x4</th>\n",
" <td>-0.055892</td>\n",
" <td>-0.042697</td>\n",
" <td>0.062938</td>\n",
" <td>1.000000</td>\n",
" <td>0.091203</td>\n",
" <td>0.091251</td>\n",
" <td>0.086518</td>\n",
" <td>-0.099176</td>\n",
" <td>-0.007368</td>\n",
" <td>-0.035587</td>\n",
" <td>-0.121515</td>\n",
" <td>0.048788</td>\n",
" <td>-0.053929</td>\n",
" <td>0.175260</td>\n",
" </tr>\n",
" <tr>\n",
" <th>x5</th>\n",
" <td>0.420972</td>\n",
" <td>-0.516604</td>\n",
" <td>0.763651</td>\n",
" <td>0.091203</td>\n",
" <td>1.000000</td>\n",
" <td>-0.302188</td>\n",
" <td>0.731470</td>\n",
" <td>-0.769230</td>\n",
" <td>0.611441</td>\n",
" <td>0.668023</td>\n",
" <td>0.188933</td>\n",
" <td>-0.380051</td>\n",
" <td>0.590879</td>\n",
" <td>-0.427321</td>\n",
" </tr>\n",
" <tr>\n",
" <th>x6</th>\n",
" <td>-0.219247</td>\n",
" <td>0.311991</td>\n",
" <td>-0.391676</td>\n",
" <td>0.091251</td>\n",
" <td>-0.302188</td>\n",
" <td>1.000000</td>\n",
" <td>-0.240265</td>\n",
" <td>0.205246</td>\n",
" <td>-0.209847</td>\n",
" <td>-0.292048</td>\n",
" <td>-0.355501</td>\n",
" <td>0.128069</td>\n",
" <td>-0.613808</td>\n",
" <td>0.695360</td>\n",
" </tr>\n",
" <tr>\n",
" <th>x7</th>\n",
" <td>0.352734</td>\n",
" <td>-0.569537</td>\n",
" <td>0.644779</td>\n",
" <td>0.086518</td>\n",
" <td>0.731470</td>\n",
" <td>-0.240265</td>\n",
" <td>1.000000</td>\n",
" <td>-0.747881</td>\n",
" <td>0.456022</td>\n",
" <td>0.506456</td>\n",
" <td>0.261515</td>\n",
" <td>-0.273534</td>\n",
" <td>0.602339</td>\n",
" <td>-0.376955</td>\n",
" </tr>\n",
" <tr>\n",
" <th>x8</th>\n",
" <td>-0.379670</td>\n",
" <td>0.664408</td>\n",
" <td>-0.708027</td>\n",
" <td>-0.099176</td>\n",
" <td>-0.769230</td>\n",
" <td>0.205246</td>\n",
" <td>-0.747881</td>\n",
" <td>1.000000</td>\n",
" <td>-0.494588</td>\n",
" <td>-0.534432</td>\n",
" <td>-0.232471</td>\n",
" <td>0.291512</td>\n",
" <td>-0.496996</td>\n",
" <td>0.249929</td>\n",
" </tr>\n",
" <tr>\n",
" <th>x9</th>\n",
" <td>0.625505</td>\n",
" <td>-0.311948</td>\n",
" <td>0.595129</td>\n",
" <td>-0.007368</td>\n",
" <td>0.611441</td>\n",
" <td>-0.209847</td>\n",
" <td>0.456022</td>\n",
" <td>-0.494588</td>\n",
" <td>1.000000</td>\n",
" <td>0.910228</td>\n",
" <td>0.464741</td>\n",
" <td>-0.444413</td>\n",
" <td>0.488676</td>\n",
" <td>-0.381626</td>\n",
" </tr>\n",
" <tr>\n",
" <th>x10</th>\n",
" <td>0.582764</td>\n",
" <td>-0.314563</td>\n",
" <td>0.720760</td>\n",
" <td>-0.035587</td>\n",
" <td>0.668023</td>\n",
" <td>-0.292048</td>\n",
" <td>0.506456</td>\n",
" <td>-0.534432</td>\n",
" <td>0.910228</td>\n",
" <td>1.000000</td>\n",
" <td>0.460853</td>\n",
" <td>-0.441808</td>\n",
" <td>0.543993</td>\n",
" <td>-0.468536</td>\n",
" </tr>\n",
" <tr>\n",
" <th>x11</th>\n",
" <td>0.289946</td>\n",
" <td>-0.391679</td>\n",
" <td>0.383248</td>\n",
" <td>-0.121515</td>\n",
" <td>0.188933</td>\n",
" <td>-0.355501</td>\n",
" <td>0.261515</td>\n",
" <td>-0.232471</td>\n",
" <td>0.464741</td>\n",
" <td>0.460853</td>\n",
" <td>1.000000</td>\n",
" <td>-0.177383</td>\n",
" <td>0.374044</td>\n",
" <td>-0.507787</td>\n",
" </tr>\n",
" <tr>\n",
" <th>x12</th>\n",
" <td>-0.385064</td>\n",
" <td>0.175520</td>\n",
" <td>-0.356977</td>\n",
" <td>0.048788</td>\n",
" <td>-0.380051</td>\n",
" <td>0.128069</td>\n",
" <td>-0.273534</td>\n",
" <td>0.291512</td>\n",
" <td>-0.444413</td>\n",
" <td>-0.441808</td>\n",
" <td>-0.177383</td>\n",
" <td>1.000000</td>\n",
" <td>-0.366087</td>\n",
" <td>0.333461</td>\n",
" </tr>\n",
" <tr>\n",
" <th>x13</th>\n",
" <td>0.455621</td>\n",
" <td>-0.412995</td>\n",
" <td>0.603800</td>\n",
" <td>-0.053929</td>\n",
" <td>0.590879</td>\n",
" <td>-0.613808</td>\n",
" <td>0.602339</td>\n",
" <td>-0.496996</td>\n",
" <td>0.488676</td>\n",
" <td>0.543993</td>\n",
" <td>0.374044</td>\n",
" <td>-0.366087</td>\n",
" <td>1.000000</td>\n",
" <td>-0.737663</td>\n",
" </tr>\n",
" <tr>\n",
" <th>y</th>\n",
" <td>-0.388305</td>\n",
" <td>0.360445</td>\n",
" <td>-0.483725</td>\n",
" <td>0.175260</td>\n",
" <td>-0.427321</td>\n",
" <td>0.695360</td>\n",
" <td>-0.376955</td>\n",
" <td>0.249929</td>\n",
" <td>-0.381626</td>\n",
" <td>-0.468536</td>\n",
" <td>-0.507787</td>\n",
" <td>0.333461</td>\n",
" <td>-0.737663</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" x1 x2 x3 x4 x5 x6 x7 \\\n",
"x1 1.000000 -0.200469 0.406583 -0.055892 0.420972 -0.219247 0.352734 \n",
"x2 -0.200469 1.000000 -0.533828 -0.042697 -0.516604 0.311991 -0.569537 \n",
"x3 0.406583 -0.533828 1.000000 0.062938 0.763651 -0.391676 0.644779 \n",
"x4 -0.055892 -0.042697 0.062938 1.000000 0.091203 0.091251 0.086518 \n",
"x5 0.420972 -0.516604 0.763651 0.091203 1.000000 -0.302188 0.731470 \n",
"x6 -0.219247 0.311991 -0.391676 0.091251 -0.302188 1.000000 -0.240265 \n",
"x7 0.352734 -0.569537 0.644779 0.086518 0.731470 -0.240265 1.000000 \n",
"x8 -0.379670 0.664408 -0.708027 -0.099176 -0.769230 0.205246 -0.747881 \n",
"x9 0.625505 -0.311948 0.595129 -0.007368 0.611441 -0.209847 0.456022 \n",
"x10 0.582764 -0.314563 0.720760 -0.035587 0.668023 -0.292048 0.506456 \n",
"x11 0.289946 -0.391679 0.383248 -0.121515 0.188933 -0.355501 0.261515 \n",
"x12 -0.385064 0.175520 -0.356977 0.048788 -0.380051 0.128069 -0.273534 \n",
"x13 0.455621 -0.412995 0.603800 -0.053929 0.590879 -0.613808 0.602339 \n",
"y -0.388305 0.360445 -0.483725 0.175260 -0.427321 0.695360 -0.376955 \n",
"\n",
" x8 x9 x10 x11 x12 x13 y \n",
"x1 -0.379670 0.625505 0.582764 0.289946 -0.385064 0.455621 -0.388305 \n",
"x2 0.664408 -0.311948 -0.314563 -0.391679 0.175520 -0.412995 0.360445 \n",
"x3 -0.708027 0.595129 0.720760 0.383248 -0.356977 0.603800 -0.483725 \n",
"x4 -0.099176 -0.007368 -0.035587 -0.121515 0.048788 -0.053929 0.175260 \n",
"x5 -0.769230 0.611441 0.668023 0.188933 -0.380051 0.590879 -0.427321 \n",
"x6 0.205246 -0.209847 -0.292048 -0.355501 0.128069 -0.613808 0.695360 \n",
"x7 -0.747881 0.456022 0.506456 0.261515 -0.273534 0.602339 -0.376955 \n",
"x8 1.000000 -0.494588 -0.534432 -0.232471 0.291512 -0.496996 0.249929 \n",
"x9 -0.494588 1.000000 0.910228 0.464741 -0.444413 0.488676 -0.381626 \n",
"x10 -0.534432 0.910228 1.000000 0.460853 -0.441808 0.543993 -0.468536 \n",
"x11 -0.232471 0.464741 0.460853 1.000000 -0.177383 0.374044 -0.507787 \n",
"x12 0.291512 -0.444413 -0.441808 -0.177383 1.000000 -0.366087 0.333461 \n",
"x13 -0.496996 0.488676 0.543993 0.374044 -0.366087 1.000000 -0.737663 \n",
"y 0.249929 -0.381626 -0.468536 -0.507787 0.333461 -0.737663 1.000000 "
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 相関係数(correlation)の算出\n",
"df.corr()"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# 相関関係を目視で確認\n",
"# sns.pairplot(df)"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x1</th>\n",
" <th>x2</th>\n",
" <th>x3</th>\n",
" <th>x4</th>\n",
" <th>x5</th>\n",
" <th>x6</th>\n",
" <th>x7</th>\n",
" <th>x8</th>\n",
" <th>x9</th>\n",
" <th>x10</th>\n",
" <th>x11</th>\n",
" <th>x12</th>\n",
" <th>x13</th>\n",
" <th>y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.00632</td>\n",
" <td>18.0</td>\n",
" <td>2.31</td>\n",
" <td>0</td>\n",
" <td>0.538</td>\n",
" <td>6.575</td>\n",
" <td>65.2</td>\n",
" <td>4.0900</td>\n",
" <td>1</td>\n",
" <td>296</td>\n",
" <td>15.3</td>\n",
" <td>396.90</td>\n",
" <td>4.98</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.02731</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0</td>\n",
" <td>0.469</td>\n",
" <td>6.421</td>\n",
" <td>78.9</td>\n",
" <td>4.9671</td>\n",
" <td>2</td>\n",
" <td>242</td>\n",
" <td>17.8</td>\n",
" <td>396.90</td>\n",
" <td>9.14</td>\n",
" <td>21.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.02729</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0</td>\n",
" <td>0.469</td>\n",
" <td>7.185</td>\n",
" <td>61.1</td>\n",
" <td>4.9671</td>\n",
" <td>2</td>\n",
" <td>242</td>\n",
" <td>17.8</td>\n",
" <td>392.83</td>\n",
" <td>4.03</td>\n",
" <td>34.7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 \\\n",
"0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 \n",
"1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 \n",
"2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 \n",
"\n",
" x13 y \n",
"0 4.98 24.0 \n",
"1 9.14 21.6 \n",
"2 4.03 34.7 "
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# df.iloc[行, 列]\n",
"X = df.iloc[:, :-1]\n",
"y = df.iloc[:, -1]"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# モデル構築と検証\n",
"from sklearn.linear_model import LinearRegression"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = LinearRegression()\n",
"model.fit(X, y)"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.74064266410940938"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 検証(決定係数の計算)\n",
"model.score(X, y)"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ -1.08011358e-01 4.64204584e-02 2.05586264e-02 2.68673382e+00\n",
" -1.77666112e+01 3.80986521e+00 6.92224640e-04 -1.47556685e+00\n",
" 3.06049479e-01 -1.23345939e-02 -9.52747232e-01 9.31168327e-03\n",
" -5.24758378e-01]\n",
"36.4594883851\n"
]
}
],
"source": [
"print(model.coef_)\n",
"print(model.intercept_)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# 訓練データ(train)と検証データ(test)\n",
"# \n",
"# 例)受験勉強用 10 年分の過去問を購入\n",
"# - 駄目なケース\n",
"# - 10 年分で勉強(学習)\n",
"# - 10 年分で実力テスト(検証)\n",
"# - 良いケース\n",
"# - 前半の 5 年分で勉強(学習) ← 訓練データ\n",
"# - 後半の 5 年分で勉強(検証) ← 検証データ"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# 訓練データと検証データの分割\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# モデルの学習\n",
"model.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.720905667266176"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 検証 ← 検証データ\n",
"model.score(X_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.74683165201406265"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 検証 ← 訓練データ\n",
"model.score(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"x1 0.00632\n",
"x2 18.00000\n",
"x3 2.31000\n",
"x4 0.00000\n",
"x5 0.53800\n",
"x6 6.57500\n",
"x7 65.20000\n",
"x8 4.09000\n",
"x9 1.00000\n",
"x10 296.00000\n",
"x11 15.30000\n",
"x12 396.90000\n",
"x13 4.98000\n",
"Name: 0, dtype: float64"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 予測値の計算\n",
"x = X.iloc[0, :]\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 29.42368847])"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_pred = model.predict([x])\n",
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# モデルの保存\n",
"from sklearn.externals import joblib"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['model.pkl']"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"joblib.dump(model, 'model.pkl')"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 29.42368847])"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# モデルの読み込み\n",
"model_new = joblib.load('model.pkl')\n",
"model_new.predict([x])"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ -8.95714048e-02, 6.73132853e-02, 5.04649248e-02,\n",
" 2.18579583e+00, -1.72053975e+01, 3.63606995e+00,\n",
" 2.05579939e-03, -1.36602886e+00, 2.89576718e-01,\n",
" -1.22700072e-02, -8.34881849e-01, 9.40360790e-03,\n",
" -5.04008320e-01])"
]
},
"execution_count": 96,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# パラメータの確認\n",
"# この変数が重要なのかどうかが判別できる\n",
"model.coef_"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ -0.09 , 0.067, 0.05 , 2.186, -17.205, 3.636, 0.002,\n",
" -1.366, 0.29 , -0.012, -0.835, 0.009, -0.504])"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.set_printoptions(precision=3, suppress=True) # 指数関数での表示が禁止\n",
"model.coef_"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x1</th>\n",
" <th>x2</th>\n",
" <th>x3</th>\n",
" <th>x4</th>\n",
" <th>x5</th>\n",
" <th>x6</th>\n",
" <th>x7</th>\n",
" <th>x8</th>\n",
" <th>x9</th>\n",
" <th>x10</th>\n",
" <th>x11</th>\n",
" <th>x12</th>\n",
" <th>x13</th>\n",
" <th>y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.00632</td>\n",
" <td>18.0</td>\n",
" <td>2.31</td>\n",
" <td>0</td>\n",
" <td>0.538</td>\n",
" <td>6.575</td>\n",
" <td>65.2</td>\n",
" <td>4.0900</td>\n",
" <td>1</td>\n",
" <td>296</td>\n",
" <td>15.3</td>\n",
" <td>396.90</td>\n",
" <td>4.98</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.02731</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0</td>\n",
" <td>0.469</td>\n",
" <td>6.421</td>\n",
" <td>78.9</td>\n",
" <td>4.9671</td>\n",
" <td>2</td>\n",
" <td>242</td>\n",
" <td>17.8</td>\n",
" <td>396.90</td>\n",
" <td>9.14</td>\n",
" <td>21.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.02729</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0</td>\n",
" <td>0.469</td>\n",
" <td>7.185</td>\n",
" <td>61.1</td>\n",
" <td>4.9671</td>\n",
" <td>2</td>\n",
" <td>242</td>\n",
" <td>17.8</td>\n",
" <td>392.83</td>\n",
" <td>4.03</td>\n",
" <td>34.7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 \\\n",
"0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 \n",
"1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 \n",
"2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 \n",
"\n",
" x13 y \n",
"0 4.98 24.0 \n",
"1 9.14 21.6 \n",
"2 4.03 34.7 "
]
},
"execution_count": 102,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# 重み(パラメータw)をみるだけでは、どの変数が影響を与えているかわからない"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@rakuishi
Copy link
Author

rakuishi commented Mar 3, 2018

pairplot

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment