ftnext/アイテムベースの協調フィルタリング.ipynb

## アイテムベースの協調フィルタリング.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              アイテムベースの協調フィルタリング.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## ユーザベースの協調フィルタリング.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              ユーザベースの協調フィルタリング.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## 協調フィルタリングを手を動かして理解する.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 評価値\n",
    "\n",
    "<table>\n",
    "    <tr>\n",
    "        <th></th>\n",
    "        <th>アイテム1</th>\n",
    "        <th>アイテム2</th>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザA</td>\n",
    "        <td>5</td>\n",
    "        <td>3</td>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザB</td>\n",
    "        <td>2</td>\n",
    "        <td>5</td>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザC</td>\n",
    "        <td>1</td>\n",
    "        <td>4</td>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザD</td>\n",
    "        <td>5</td>\n",
    "        <td>2</td>\n",
    "    </tr>\n",
    "</table>\n",
    "\n",
    "例：ユーザAはアイテム1を☆5と評価した"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[5 3]\n",
      " [2 5]\n",
      " [1 4]\n",
      " [5 2]]\n"
     ]
    }
   ],
   "source": [
    "mat = np.array([\n",
    "    [5, 3],\n",
    "    [2, 5],\n",
    "    [1, 4],\n",
    "    [5, 2]\n",
    "])\n",
    "print(mat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([5, 3])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 列の取り出し\n",
    "mat[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2, 5])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mat[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 3, -2])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 差\n",
    "mat[0]-mat[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3.605551275463989"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ユークリッド距離の計算\n",
    "np.linalg.norm(mat[0]-mat[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4.123105625617661"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.linalg.norm(mat[0]-mat[2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.linalg.norm(mat[0]-mat[3])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 評価値(=好み)の距離の行列\n",
    "\n",
    "ここでは評価値を2次元の座標のように見て「距離」を計算している。\n",
    "\n",
    "<table>\n",
    "    <tr>\n",
    "        <th></th>\n",
    "        <th>ユーザA</th>\n",
    "        <th>ユーザB</th>\n",
    "        <th>ユーザC</th>\n",
    "        <th>ユーザD</th>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザA</td>\n",
    "        <td>0</td>\n",
    "        <td></td>\n",
    "        <td></td>\n",
    "        <td></td>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザB</td>\n",
    "        <td></td>\n",
    "        <td>0</td>\n",
    "        <td></td>\n",
    "        <td></td>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザC</td>\n",
    "        <td></td>\n",
    "        <td></td>\n",
    "        <td>0</td>\n",
    "        <td></td>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザD</td>\n",
    "        <td></td>\n",
    "        <td></td>\n",
    "        <td></td>\n",
    "        <td>0</td>\n",
    "    </tr>\n",
    "</table>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0.   3.61 4.12 1.  ]\n",
      " [3.61 0.   1.41 4.24]\n",
      " [4.12 1.41 0.   4.47]\n",
      " [1.   4.24 4.47 0.  ]]\n"
     ]
    }
   ],
   "source": [
    "# 内包表記でかっこよく書き換え\n",
    "distance_rows = []\n",
    "for i in range(0, len(mat)):\n",
    "    row = [round(np.linalg.norm(mat[i]-mat[j]), 2) for j in range(0, len(mat))]\n",
    "    distance_rows.append(row)\n",
    "\n",
    "dist_mat = np.array(distance_rows)\n",
    "print(dist_mat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0.   3.61 4.12 1.  ]\n",
      " [3.61 0.   1.41 4.24]\n",
      " [4.12 1.41 0.   4.47]\n",
      " [1.   4.24 4.47 0.  ]]\n"
     ]
    }
   ],
   "source": [
    "distance_rows = []\n",
    "for i in range(0, len(mat)):\n",
    "    row = []\n",
    "    for j in range(0, len(mat)):\n",
    "        distance = np.linalg.norm(mat[i]-mat[j])\n",
    "        row.append(round(distance, 2))\n",
    "    distance_rows.append(row)\n",
    "\n",
    "dist_mat = np.array(distance_rows)\n",
    "print(dist_mat)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 評価値(=好み)の類似度の行列\n",
    "\n",
    "$Score = \\frac{1}{1+距離}$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1.   0.22 0.2  0.5 ]\n",
      " [0.22 1.   0.41 0.19]\n",
      " [0.2  0.41 1.   0.18]\n",
      " [0.5  0.19 0.18 1.  ]]\n"
     ]
    }
   ],
   "source": [
    "score_rows = []\n",
    "for i in range(0, len(mat)):\n",
    "    row = []\n",
    "    for j in range(0, len(mat)):\n",
    "        distance = np.linalg.norm(mat[i]-mat[j])\n",
    "        score = 1 / (1 + round(distance, 2))\n",
    "        row.append(round(score, 2))\n",
    "    score_rows.append(row)\n",
    "\n",
    "score_mat = np.array(score_rows)\n",
    "print(score_mat)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 類似度行列をもとにスコア付け\n",
    "\n",
    "ユーザDについてA,B,Cの評価と類似度を使ってどのアイテムをおすすめするか考える\n",
    "\n",
    "<table>\n",
    "    <tr>\n",
    "        <th></th>\n",
    "        <th>アイテム3</th>\n",
    "        <th>アイテム4</th>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザA</td>\n",
    "        <td>5</td>\n",
    "        <td>-</td>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザB</td>\n",
    "        <td>1</td>\n",
    "        <td>5</td>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザC</td>\n",
    "        <td>2</td>\n",
    "        <td>4</td>\n",
    "    </tr>\n",
    "</table>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 5. nan]\n",
      " [ 1.  5.]\n",
      " [ 2.  4.]]\n"
     ]
    }
   ],
   "source": [
    "mat2 = np.array([\n",
    "    [5, np.nan],\n",
    "    [1, 5],\n",
    "    [2, 4]\n",
    "])\n",
    "print(mat2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 4., nan])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# nanがあっても差は計算される\n",
    "mat2[0]-mat2[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "nan"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# nanがあるとnormはnanになる (つまり、nanがあるとユークリッド距離が計算できない)\n",
    "np.linalg.norm(mat2[0]-mat2[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.5 , 0.19, 0.18, 1.  ])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score_resemble_to_D = score_mat[3].copy()\n",
    "score_resemble_to_D"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "ITEM3 = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "重み付けスコア: 3.05\n",
      "類似度の合計: 0.8699999999999999\n",
      "正規化済み重み付けスコア 3.51\n"
     ]
    }
   ],
   "source": [
    "score_sum = 0.0\n",
    "resemble_sum = 0.0\n",
    "\n",
    "for i in range(0, 3):\n",
    "    # 自身との類似度は計算しない\n",
    "    if i == 3:\n",
    "        continue\n",
    "    # 評価していないアイテムの場合は類似度を計算しない\n",
    "    if np.isnan(mat2[i][ITEM3]):\n",
    "        continue\n",
    "    resemble_sum += score_resemble_to_D[i]\n",
    "    score_sum += score_resemble_to_D[i] * mat2[i][ITEM3]\n",
    "\n",
    "print('重み付けスコア:', score_sum)\n",
    "print('類似度の合計:', resemble_sum)\n",
    "normalized_score = score_sum / resemble_sum\n",
    "print('正規化済み重み付けスコア', round(normalized_score, 2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.5\n",
      "0.19\n",
      "0.36\n"
     ]
    }
   ],
   "source": [
    "for i in range(0, 3):\n",
    "    # 自身との類似度は計算しない\n",
    "    if i == 3:\n",
    "        continue\n",
    "    print(score_resemble_to_D[i] * mat2[i][ITEM3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "ITEM4 = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "重み付けスコア: 1.67\n",
      "類似度の合計: 0.37\n",
      "正規化済み重み付けスコア 4.51\n"
     ]
    }
   ],
   "source": [
    "score_sum = 0.0\n",
    "resemble_sum = 0.0\n",
    "\n",
    "for i in range(0, 3):\n",
    "    # 自身との類似度は計算しない\n",
    "    if i == 3:\n",
    "        continue\n",
    "    # 評価していないアイテムの場合は類似度を計算しない\n",
    "    if np.isnan(mat2[i][ITEM4]):\n",
    "        continue\n",
    "    resemble_sum += score_resemble_to_D[i]\n",
    "    score_sum += score_resemble_to_D[i] * mat2[i][ITEM4]\n",
    "\n",
    "print('重み付けスコア:', score_sum)\n",
    "print('類似度の合計:', resemble_sum)\n",
    "normalized_score = score_sum / resemble_sum\n",
    "print('正規化済み重み付けスコア', round(normalized_score, 2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 類似度行列をもとにスコア付け (まとめ)\n",
    "\n",
    "ユーザDについてA,B,Cの評価と類似度を使ってどのアイテムをおすすめするか考える\n",
    "\n",
    "→ユーザDにはアイテム4, アイテム3の順でおすすめする\n",
    "\n",
    "似ているユーザ1人がおすすめしているアイテムよりも、似ていないユーザ2人がおすすめしているアイテムがレコメンドされるらしい\n",
    "\n",
    "<table>\n",
    "    <tr>\n",
    "        <th></th>\n",
    "        <th>アイテム3</th>\n",
    "        <th>アイテム4</th>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザA</td>\n",
    "        <td>5</td>\n",
    "        <td>-</td>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザB</td>\n",
    "        <td>1</td>\n",
    "        <td>5</td>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザC</td>\n",
    "        <td>2</td>\n",
    "        <td>4</td>\n",
    "    </tr>\n",
    "    <tr>\n",
    "        <td>ユーザDへのスコア</td>\n",
    "        <td>3.51</td>\n",
    "        <td>4.51</td>\n",
    "    </tr>\n",
    "</table>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "関数化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def normalized_score(rating_mat, item, resemble_score, users, me):\n",
    "    \"\"\"正規化済みの重み付けスコアを算出する\n",
    "    \n",
    "    rating_mat: 評価値行列。行:ユーザ、列:アイテム\n",
    "    item: rating_matでアイテムの列を指定\n",
    "    resemble_score: 類似度の配列\n",
    "    users: 全ユーザ(評価をしたことがある人全員の人数)\n",
    "    me: 正規化済み重み付けスコア算出対象。配列のindexで与える\n",
    "    \"\"\"\n",
    "    score_sum = 0.0\n",
    "    resemble_sum = 0.0\n",
    "    for i in range(0, users):\n",
    "        # 自身との類似度は計算しない\n",
    "        if i == me:\n",
    "            continue\n",
    "        # 評価していないアイテムの場合は類似度を計算しない\n",
    "        if np.isnan(rating_mat[i][item]):\n",
    "            continue\n",
    "        resemble_sum += resemble_score[i]\n",
    "        score_sum += resemble_score[i] * rating_mat[i][item]\n",
    "    \n",
    "    print('重み付けスコア:', score_sum)\n",
    "    print('類似度の合計:', resemble_sum)\n",
    "    return round(score_sum / resemble_sum, 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "重み付けスコア: 3.05\n",
      "類似度の合計: 0.8699999999999999\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "3.51"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "normalized_score(mat2, ITEM3, score_resemble_to_D, len(mat)-1, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "重み付けスコア: 1.67\n",
      "類似度の合計: 0.37\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "4.51"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "normalized_score(mat2, ITEM4, score_resemble_to_D, len(mat)-1, 3)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"import numpy as np"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 評価値\n",
	"\n",
	"<table>\n",
	" <tr>\n",
	" <th></th>\n",
	" <th>アイテム1</th>\n",
	" <th>アイテム2</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザA</td>\n",
	" <td>5</td>\n",
	" <td>3</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザB</td>\n",
	" <td>2</td>\n",
	" <td>5</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザC</td>\n",
	" <td>1</td>\n",
	" <td>4</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザD</td>\n",
	" <td>5</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	"</table>\n",
	"\n",
	"例：ユーザAはアイテム1を☆5と評価した"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[5 3]\n",
	" [2 5]\n",
	" [1 4]\n",
	" [5 2]]\n"
	]
	}
	],
	"source": [
	"mat = np.array([\n",
	" [5, 3],\n",
	" [2, 5],\n",
	" [1, 4],\n",
	" [5, 2]\n",
	"])\n",
	"print(mat)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([5, 3])"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# 列の取り出し\n",
	"mat[0]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([2, 5])"
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"mat[1]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([ 3, -2])"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# 差\n",
	"mat[0]-mat[1]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"3.605551275463989"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# ユークリッド距離の計算\n",
	"np.linalg.norm(mat[0]-mat[1])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"4.123105625617661"
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"np.linalg.norm(mat[0]-mat[2])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"1.0"
	]
	},
	"execution_count": 9,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"np.linalg.norm(mat[0]-mat[3])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 評価値(=好み)の距離の行列\n",
	"\n",
	"ここでは評価値を2次元の座標のように見て「距離」を計算している。\n",
	"\n",
	"<table>\n",
	" <tr>\n",
	" <th></th>\n",
	" <th>ユーザA</th>\n",
	" <th>ユーザB</th>\n",
	" <th>ユーザC</th>\n",
	" <th>ユーザD</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザA</td>\n",
	" <td>0</td>\n",
	" <td></td>\n",
	" <td></td>\n",
	" <td></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザB</td>\n",
	" <td></td>\n",
	" <td>0</td>\n",
	" <td></td>\n",
	" <td></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザC</td>\n",
	" <td></td>\n",
	" <td></td>\n",
	" <td>0</td>\n",
	" <td></td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザD</td>\n",
	" <td></td>\n",
	" <td></td>\n",
	" <td></td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	"</table>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[0. 3.61 4.12 1. ]\n",
	" [3.61 0. 1.41 4.24]\n",
	" [4.12 1.41 0. 4.47]\n",
	" [1. 4.24 4.47 0. ]]\n"
	]
	}
	],
	"source": [
	"# 内包表記でかっこよく書き換え\n",
	"distance_rows = []\n",
	"for i in range(0, len(mat)):\n",
	" row = [round(np.linalg.norm(mat[i]-mat[j]), 2) for j in range(0, len(mat))]\n",
	" distance_rows.append(row)\n",
	"\n",
	"dist_mat = np.array(distance_rows)\n",
	"print(dist_mat)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[0. 3.61 4.12 1. ]\n",
	" [3.61 0. 1.41 4.24]\n",
	" [4.12 1.41 0. 4.47]\n",
	" [1. 4.24 4.47 0. ]]\n"
	]
	}
	],
	"source": [
	"distance_rows = []\n",
	"for i in range(0, len(mat)):\n",
	" row = []\n",
	" for j in range(0, len(mat)):\n",
	" distance = np.linalg.norm(mat[i]-mat[j])\n",
	" row.append(round(distance, 2))\n",
	" distance_rows.append(row)\n",
	"\n",
	"dist_mat = np.array(distance_rows)\n",
	"print(dist_mat)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 評価値(=好み)の類似度の行列\n",
	"\n",
	"$Score = \\frac{1}{1+距離}$"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[1. 0.22 0.2 0.5 ]\n",
	" [0.22 1. 0.41 0.19]\n",
	" [0.2 0.41 1. 0.18]\n",
	" [0.5 0.19 0.18 1. ]]\n"
	]
	}
	],
	"source": [
	"score_rows = []\n",
	"for i in range(0, len(mat)):\n",
	" row = []\n",
	" for j in range(0, len(mat)):\n",
	" distance = np.linalg.norm(mat[i]-mat[j])\n",
	" score = 1 / (1 + round(distance, 2))\n",
	" row.append(round(score, 2))\n",
	" score_rows.append(row)\n",
	"\n",
	"score_mat = np.array(score_rows)\n",
	"print(score_mat)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 類似度行列をもとにスコア付け\n",
	"\n",
	"ユーザDについてA,B,Cの評価と類似度を使ってどのアイテムをおすすめするか考える\n",
	"\n",
	"<table>\n",
	" <tr>\n",
	" <th></th>\n",
	" <th>アイテム3</th>\n",
	" <th>アイテム4</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザA</td>\n",
	" <td>5</td>\n",
	" <td>-</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザB</td>\n",
	" <td>1</td>\n",
	" <td>5</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザC</td>\n",
	" <td>2</td>\n",
	" <td>4</td>\n",
	" </tr>\n",
	"</table>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[ 5. nan]\n",
	" [ 1. 5.]\n",
	" [ 2. 4.]]\n"
	]
	}
	],
	"source": [
	"mat2 = np.array([\n",
	" [5, np.nan],\n",
	" [1, 5],\n",
	" [2, 4]\n",
	"])\n",
	"print(mat2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([ 4., nan])"
	]
	},
	"execution_count": 14,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# nanがあっても差は計算される\n",
	"mat2[0]-mat2[1]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"nan"
	]
	},
	"execution_count": 15,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# nanがあるとnormはnanになる (つまり、nanがあるとユークリッド距離が計算できない)\n",
	"np.linalg.norm(mat2[0]-mat2[1])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([0.5 , 0.19, 0.18, 1. ])"
	]
	},
	"execution_count": 16,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"score_resemble_to_D = score_mat[3].copy()\n",
	"score_resemble_to_D"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [],
	"source": [
	"ITEM3 = 0"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"重み付けスコア: 3.05\n",
	"類似度の合計: 0.8699999999999999\n",
	"正規化済み重み付けスコア 3.51\n"
	]
	}
	],
	"source": [
	"score_sum = 0.0\n",
	"resemble_sum = 0.0\n",
	"\n",
	"for i in range(0, 3):\n",
	" # 自身との類似度は計算しない\n",
	" if i == 3:\n",
	" continue\n",
	" # 評価していないアイテムの場合は類似度を計算しない\n",
	" if np.isnan(mat2[i][ITEM3]):\n",
	" continue\n",
	" resemble_sum += score_resemble_to_D[i]\n",
	" score_sum += score_resemble_to_D[i] * mat2[i][ITEM3]\n",
	"\n",
	"print('重み付けスコア:', score_sum)\n",
	"print('類似度の合計:', resemble_sum)\n",
	"normalized_score = score_sum / resemble_sum\n",
	"print('正規化済み重み付けスコア', round(normalized_score, 2))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"2.5\n",
	"0.19\n",
	"0.36\n"
	]
	}
	],
	"source": [
	"for i in range(0, 3):\n",
	" # 自身との類似度は計算しない\n",
	" if i == 3:\n",
	" continue\n",
	" print(score_resemble_to_D[i] * mat2[i][ITEM3])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [],
	"source": [
	"ITEM4 = 1"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"重み付けスコア: 1.67\n",
	"類似度の合計: 0.37\n",
	"正規化済み重み付けスコア 4.51\n"
	]
	}
	],
	"source": [
	"score_sum = 0.0\n",
	"resemble_sum = 0.0\n",
	"\n",
	"for i in range(0, 3):\n",
	" # 自身との類似度は計算しない\n",
	" if i == 3:\n",
	" continue\n",
	" # 評価していないアイテムの場合は類似度を計算しない\n",
	" if np.isnan(mat2[i][ITEM4]):\n",
	" continue\n",
	" resemble_sum += score_resemble_to_D[i]\n",
	" score_sum += score_resemble_to_D[i] * mat2[i][ITEM4]\n",
	"\n",
	"print('重み付けスコア:', score_sum)\n",
	"print('類似度の合計:', resemble_sum)\n",
	"normalized_score = score_sum / resemble_sum\n",
	"print('正規化済み重み付けスコア', round(normalized_score, 2))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### 類似度行列をもとにスコア付け (まとめ)\n",
	"\n",
	"ユーザDについてA,B,Cの評価と類似度を使ってどのアイテムをおすすめするか考える\n",
	"\n",
	"→ユーザDにはアイテム4, アイテム3の順でおすすめする\n",
	"\n",
	"似ているユーザ1人がおすすめしているアイテムよりも、似ていないユーザ2人がおすすめしているアイテムがレコメンドされるらしい\n",
	"\n",
	"<table>\n",
	" <tr>\n",
	" <th></th>\n",
	" <th>アイテム3</th>\n",
	" <th>アイテム4</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザA</td>\n",
	" <td>5</td>\n",
	" <td>-</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザB</td>\n",
	" <td>1</td>\n",
	" <td>5</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザC</td>\n",
	" <td>2</td>\n",
	" <td>4</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>ユーザDへのスコア</td>\n",
	" <td>3.51</td>\n",
	" <td>4.51</td>\n",
	" </tr>\n",
	"</table>"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"関数化"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [],
	"source": [
	"def normalized_score(rating_mat, item, resemble_score, users, me):\n",
	" \"\"\"正規化済みの重み付けスコアを算出する\n",
	" \n",
	" rating_mat: 評価値行列。行:ユーザ、列:アイテム\n",
	" item: rating_matでアイテムの列を指定\n",
	" resemble_score: 類似度の配列\n",
	" users: 全ユーザ(評価をしたことがある人全員の人数)\n",
	" me: 正規化済み重み付けスコア算出対象。配列のindexで与える\n",
	" \"\"\"\n",
	" score_sum = 0.0\n",
	" resemble_sum = 0.0\n",
	" for i in range(0, users):\n",
	" # 自身との類似度は計算しない\n",
	" if i == me:\n",
	" continue\n",
	" # 評価していないアイテムの場合は類似度を計算しない\n",
	" if np.isnan(rating_mat[i][item]):\n",
	" continue\n",
	" resemble_sum += resemble_score[i]\n",
	" score_sum += resemble_score[i] * rating_mat[i][item]\n",
	" \n",
	" print('重み付けスコア:', score_sum)\n",
	" print('類似度の合計:', resemble_sum)\n",
	" return round(score_sum / resemble_sum, 2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"重み付けスコア: 3.05\n",
	"類似度の合計: 0.8699999999999999\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"3.51"
	]
	},
	"execution_count": 23,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"normalized_score(mat2, ITEM3, score_resemble_to_D, len(mat)-1, 3)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"重み付けスコア: 1.67\n",
	"類似度の合計: 0.37\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"4.51"
	]
	},
	"execution_count": 24,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"normalized_score(mat2, ITEM4, score_resemble_to_D, len(mat)-1, 3)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.1"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}