rakuishi/20180225_multiple-linear-regression_1.ipynb

## 20180225_multiple-linear-regression_1.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 行列演算の基礎"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "$\\boldsymbol{w} = (\\boldsymbol{X}^{T}\\boldsymbol{X})^{-1}\\boldsymbol{X}^{T}\\boldsymbol{y}$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- ベクトルの定義\n",
    "- 行列の定義\n",
    "- 転置\n",
    "- 逆行列\n",
    "- 行列積"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1]\n",
      " [2]\n",
      " [3]]\n"
     ]
    }
   ],
   "source": [
    "# ベクトルの定義\n",
    "x = np.array([[1], [2], [3]])\n",
    "print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1 2]\n",
      " [3 4]]\n"
     ]
    }
   ],
   "source": [
    "# 行列の定義\n",
    "X = np.array([[1, 2], [3, 4]])\n",
    "print(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1 3]\n",
      " [2 4]]\n"
     ]
    }
   ],
   "source": [
    "# 転置\n",
    "Xt = X.T\n",
    "print(Xt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[-2.   1. ]\n",
      " [ 1.5 -0.5]]\n"
     ]
    }
   ],
   "source": [
    "# 逆行列\n",
    "# linear algebra: 線形代数\n",
    "X_inv = np.linalg.inv(X)\n",
    "print(X_inv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[  1.00000000e+00   1.11022302e-16]\n",
      " [  0.00000000e+00   1.00000000e+00]]\n"
     ]
    }
   ],
   "source": [
    "# 行列積\n",
    "XX_inv = np.dot(X, X_inv)\n",
    "print(XX_inv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1],\n",
       "       [2],\n",
       "       [3]])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1, 2, 3]])"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[2 3 4]\n",
      " [1 2 3]]\n"
     ]
    }
   ],
   "source": [
    "X = np.array([\n",
    "    [2, 3, 4],\n",
    "    [1, 2, 3],\n",
    "])\n",
    "print(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2, 3)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "row, col = X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2 3\n"
     ]
    }
   ],
   "source": [
    "print(row, col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2 3 4]\n",
      "[1 2 3]\n"
     ]
    }
   ],
   "source": [
    "for x in X:\n",
    "    print(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 演習問題"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "$\\boldsymbol{w} = (\\boldsymbol{X}^{T}\\boldsymbol{X})^{-1}\\boldsymbol{X}^{T}\\boldsymbol{y}$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1 2 3]\n",
      " [1 2 5]\n",
      " [1 3 4]\n",
      " [1 5 9]]\n",
      "[[1]\n",
      " [5]\n",
      " [6]\n",
      " [8]]\n"
     ]
    }
   ],
   "source": [
    "X = np.array([\n",
    "    [1, 2, 3],\n",
    "    [1, 2, 5],\n",
    "    [1, 3, 4],\n",
    "    [1, 5, 9],\n",
    "])\n",
    "print(X)\n",
    "\n",
    "y = np.array([\n",
    "    [1],\n",
    "    [5],\n",
    "    [6],\n",
    "    [8],\n",
    "])\n",
    "print(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[  4  12  21]\n",
      " [ 12  42  73]\n",
      " [ 21  73 131]]\n"
     ]
    }
   ],
   "source": [
    "Xt = X.T\n",
    "XtX = np.dot(Xt, X)\n",
    "print(XtX)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 1.76530612 -0.39795918 -0.06122449]\n",
      " [-0.39795918  0.84693878 -0.40816327]\n",
      " [-0.06122449 -0.40816327  0.24489796]]\n"
     ]
    }
   ],
   "source": [
    "XtX_inv = np.linalg.inv(XtX)\n",
    "print(XtX_inv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 20]\n",
      " [ 70]\n",
      " [124]]\n"
     ]
    }
   ],
   "source": [
    "Xty = np.dot(Xt, y)\n",
    "print(Xty)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[-0.14285714]\n",
      " [ 0.71428571]\n",
      " [ 0.57142857]]\n"
     ]
    }
   ],
   "source": [
    "w = np.dot(XtX_inv, Xty)\n",
    "print(w)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Scikit-learn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LinearRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# モデルの宣言\n",
    "model = LinearRegression()\n",
    "\n",
    "# モデルの学習 ← パラメータの調整\n",
    "model.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.        ,  0.71428571,  0.57142857]])"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 調整後のパラメータ\n",
    "model.coef_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-0.14285714])"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.intercept_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.69230769230769229"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 予測精度 ← 決定係数\n",
    "model.score(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 3.]]\n"
     ]
    }
   ],
   "source": [
    "# 予測値の計算\n",
    "x = np.array([[1, 2, 3]])\n",
    "y_pred = model.predict(x)\n",
    "print(y_pred)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

## 20180225_multiple-linear-regression_2.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              20180225_multiple-linear-regression_2.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## 20180301_multiple-linear-regression_3.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              20180301_multiple-linear-regression_3.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# 行列演算の基礎"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"$\\boldsymbol{w} = (\\boldsymbol{X}^{T}\\boldsymbol{X})^{-1}\\boldsymbol{X}^{T}\\boldsymbol{y}$"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"- ベクトルの定義\n",
	"- 行列の定義\n",
	"- 転置\n",
	"- 逆行列\n",
	"- 行列積"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[1]\n",
	" [2]\n",
	" [3]]\n"
	]
	}
	],
	"source": [
	"# ベクトルの定義\n",
	"x = np.array([[1], [2], [3]])\n",
	"print(x)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[1 2]\n",
	" [3 4]]\n"
	]
	}
	],
	"source": [
	"# 行列の定義\n",
	"X = np.array([[1, 2], [3, 4]])\n",
	"print(X)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[1 3]\n",
	" [2 4]]\n"
	]
	}
	],
	"source": [
	"# 転置\n",
	"Xt = X.T\n",
	"print(Xt)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[-2. 1. ]\n",
	" [ 1.5 -0.5]]\n"
	]
	}
	],
	"source": [
	"# 逆行列\n",
	"# linear algebra: 線形代数\n",
	"X_inv = np.linalg.inv(X)\n",
	"print(X_inv)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[ 1.00000000e+00 1.11022302e-16]\n",
	" [ 0.00000000e+00 1.00000000e+00]]\n"
	]
	}
	],
	"source": [
	"# 行列積\n",
	"XX_inv = np.dot(X, X_inv)\n",
	"print(XX_inv)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([[1],\n",
	" [2],\n",
	" [3]])"
	]
	},
	"execution_count": 17,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"x"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([[1, 2, 3]])"
	]
	},
	"execution_count": 18,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"x.T"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[2 3 4]\n",
	" [1 2 3]]\n"
	]
	}
	],
	"source": [
	"X = np.array([\n",
	" [2, 3, 4],\n",
	" [1, 2, 3],\n",
	"])\n",
	"print(X)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"(2, 3)"
	]
	},
	"execution_count": 20,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"X.shape"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"row, col = X.shape"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"2 3\n"
	]
	}
	],
	"source": [
	"print(row, col)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[2 3 4]\n",
	"[1 2 3]\n"
	]
	}
	],
	"source": [
	"for x in X:\n",
	" print(x)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# 演習問題"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"$\\boldsymbol{w} = (\\boldsymbol{X}^{T}\\boldsymbol{X})^{-1}\\boldsymbol{X}^{T}\\boldsymbol{y}$"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[1 2 3]\n",
	" [1 2 5]\n",
	" [1 3 4]\n",
	" [1 5 9]]\n",
	"[[1]\n",
	" [5]\n",
	" [6]\n",
	" [8]]\n"
	]
	}
	],
	"source": [
	"X = np.array([\n",
	" [1, 2, 3],\n",
	" [1, 2, 5],\n",
	" [1, 3, 4],\n",
	" [1, 5, 9],\n",
	"])\n",
	"print(X)\n",
	"\n",
	"y = np.array([\n",
	" [1],\n",
	" [5],\n",
	" [6],\n",
	" [8],\n",
	"])\n",
	"print(y)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 30,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[ 4 12 21]\n",
	" [ 12 42 73]\n",
	" [ 21 73 131]]\n"
	]
	}
	],
	"source": [
	"Xt = X.T\n",
	"XtX = np.dot(Xt, X)\n",
	"print(XtX)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 31,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[ 1.76530612 -0.39795918 -0.06122449]\n",
	" [-0.39795918 0.84693878 -0.40816327]\n",
	" [-0.06122449 -0.40816327 0.24489796]]\n"
	]
	}
	],
	"source": [
	"XtX_inv = np.linalg.inv(XtX)\n",
	"print(XtX_inv)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 32,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[ 20]\n",
	" [ 70]\n",
	" [124]]\n"
	]
	}
	],
	"source": [
	"Xty = np.dot(Xt, y)\n",
	"print(Xty)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 33,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[-0.14285714]\n",
	" [ 0.71428571]\n",
	" [ 0.57142857]]\n"
	]
	}
	],
	"source": [
	"w = np.dot(XtX_inv, Xty)\n",
	"print(w)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Scikit-learn"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 35,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"from sklearn.linear_model import LinearRegression"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 36,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
	]
	},
	"execution_count": 36,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# モデルの宣言\n",
	"model = LinearRegression()\n",
	"\n",
	"# モデルの学習 ← パラメータの調整\n",
	"model.fit(X, y)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 37,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([[ 0. , 0.71428571, 0.57142857]])"
	]
	},
	"execution_count": 37,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# 調整後のパラメータ\n",
	"model.coef_"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 38,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([-0.14285714])"
	]
	},
	"execution_count": 38,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"model.intercept_"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 39,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.69230769230769229"
	]
	},
	"execution_count": 39,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# 予測精度 ← 決定係数\n",
	"model.score(X, y)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 42,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[ 3.]]\n"
	]
	}
	],
	"source": [
	"# 予測値の計算\n",
	"x = np.array([[1, 2, 3]])\n",
	"y_pred = model.predict(x)\n",
	"print(y_pred)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}