Skip to content

Instantly share code, notes, and snippets.

@RottenFruits
Created May 20, 2018 10:17
Show Gist options
  • Save RottenFruits/4cf03ad9c6098d1895c508d35127ff17 to your computer and use it in GitHub Desktop.
Save RottenFruits/4cf03ad9c6098d1895c508d35127ff17 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import random"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# MF"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class MatrixFactorization(object):\n",
"\n",
" def __init__(self, K=20, alpha=1e-6, beta = 0.0):\n",
" self.K = K \n",
" self.alpha = alpha\n",
" self.beta = beta\n",
"\n",
"\n",
" def fit(self, X, n_user, n_item, n_iter = 100):\n",
" self.R = X.copy()\n",
" self.samples = X.copy()\n",
"\n",
" self.user_factors = np.random.rand(n_user, self.K)\n",
" self.item_factors = np.random.rand(n_item, self.K)\n",
" \n",
" #stochastic gradient descent \n",
" self.loss = []\n",
" for i in range(n_iter):\n",
" self.sgd()\n",
" mse = self.mse()\n",
" self.loss.append((i, mse)) \n",
" \n",
" def sgd(self):\n",
" np.random.shuffle(self.samples)\n",
" for user, item, rating in self.samples:\n",
" err = rating - self.predict_pair(user, item) \n",
" \n",
" # update parameter\n",
" self.user_factors[user] += self.alpha * (err * self.item_factors[item] - self.beta * self.user_factors[user])\n",
" self.item_factors[item] += self.alpha * (err * self.user_factors[user] - self.beta * self.item_factors[item]) \n",
" \n",
" def mse(self):\n",
" predicted = self.predict(self.R)\n",
" error = np.hstack((self.R, np.array(predicted).reshape(-1, 1)))\n",
" error = np.sqrt(pow((error[:, 2] - error[:, 3]), 2).mean())\n",
" return error\n",
" \n",
" def predict_pair(self, user, item):\n",
" return np.inner(self.user_factors[user], self.item_factors[item])\n",
" \n",
" def predict(self, X):\n",
" rate = []\n",
" for row in X:\n",
" rate.append(self.predict_pair(row[0], row[1])) \n",
" return rate\n",
" \n",
" def get_full_matrix(self):\n",
" return np.inner(self.user_factors, self.item_factors)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Bias MF"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class BiasMatrixFactorization(object):\n",
"\n",
" def __init__(self, K=20, alpha=1e-6, beta = 0.0):\n",
" self.K = K \n",
" self.alpha = alpha\n",
" self.beta = beta\n",
"\n",
" \n",
" def fit(self, X, n_user, n_item, n_iter = 100):\n",
" self.R = X.copy()\n",
" self.samples = X.copy()\n",
"\n",
" self.user_factors = np.random.rand(n_user, self.K)\n",
" self.item_factors = np.random.rand(n_item, self.K)\n",
" \n",
" self.bias_u = np.zeros(n_user)\n",
" self.bias_i = np.zeros(n_item)\n",
" self.bias = np.mean(X[:, 2])\n",
" \n",
" #stochastic gradient descent \n",
" self.loss = []\n",
" for i in range(n_iter):\n",
" self.sgd()\n",
" mse = self.mse()\n",
" self.loss.append((i, mse))\n",
" \n",
" def sgd(self):\n",
" np.random.shuffle(self.samples)\n",
" for user, item, rating in self.samples:\n",
" err = rating - self.predict_pair(user, item)\n",
" \n",
" # update parameter\n",
" self.bias_u[user] += self.alpha * (err - self.beta * self.bias_u[user])\n",
" self.bias_i[item] += self.alpha * (err - self.beta * self.bias_i[item])\n",
" \n",
" self.user_factors[user] += self.alpha * (err * self.item_factors[item] - self.beta * self.user_factors[user])\n",
" self.item_factors[item] += self.alpha * (err * self.user_factors[user] - self.beta * self.item_factors[item]) \n",
" \n",
" def mse(self):\n",
" predicted = self.predict(self.R)\n",
" error = np.hstack((self.R, np.array(predicted).reshape(-1, 1)))\n",
" error = np.sqrt(pow((error[:, 2] - error[:, 3]), 2).mean())\n",
" return error\n",
" \n",
" def predict_pair(self, user, item):\n",
" return self.bias + self.bias_u[user] + self.bias_i[item] + np.inner(self.user_factors[user], self.item_factors[item])\n",
" \n",
" def predict(self, X):\n",
" rate = []\n",
" for row in X:\n",
" rate.append(self.predict_pair(row[0], row[1])) \n",
" return rate\n",
" \n",
" def get_full_matrix(self):\n",
" return self.bias + self.bias_u.reshape(-1, 1) + self.bias_i + np.inner(self.user_factors, self.item_factors)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# load data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"def load_ml100k():\n",
" samples = pd.read_csv('data/ml-100k/u.data', sep = '\\t', header=None)\n",
" \n",
" samples = samples.iloc[:, :3]\n",
" samples.columns = ['user', 'item', 'rate']\n",
" \n",
" samples['user'] = samples['user'] - 1\n",
" samples['item'] = samples['item'] - 1\n",
" \n",
" return samples"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# main"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = np.array(load_ml100k())\n",
"\n",
"n_user = np.unique(df[:, 0]).max() + 1\n",
"n_item = np.unique(df[:, 1]).max() + 1\n",
"n_rate = np.unique(df[:, 2]).max()\n",
"\n",
"random.shuffle(df)\n",
"train_size = int(df.shape[0] * 0.8)\n",
"train_df = df[:train_size]\n",
"test_df = df[train_size:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Matrix Factorization\n",
"MF = MatrixFactorization(K = 20, alpha = 0.01, beta = 0.5)\n",
"MF.fit(train_df, n_user, n_item, n_iter = 10)\n",
"\n",
"pre = MF.predict(test_df)\n",
"ret1 = np.hstack((test_df, np.array(pre).reshape(-1, 1)))\n",
"np.sqrt(pow((ret1[:, 2] - ret1[:, 3]), 2).mean())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Bias Matrix Factorization\n",
"BMF = BiasMatrixFactorization(K=20, alpha = 0.01, beta = 0.5)\n",
"BMF.fit(train_df, n_user, n_item, n_iter = 10)\n",
"\n",
"pre2 = BMF.predict(test_df[:, :2])\n",
"ret2 =np.hstack((test_df, np.array(pre2).reshape(-1, 1)))\n",
"np.sqrt(pow((ret2[:, 2] - ret2[:, 3]), 2).mean())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment