Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RottenFruits/8135b68eaaf2c50c60fe32c0af3d8253 to your computer and use it in GitHub Desktop.
Save RottenFruits/8135b68eaaf2c50c60fe32c0af3d8253 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# package"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"using Statistics"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# module"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Main.VariationalBayesianProbabilisticMatrixFactorization"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"module VariationalBayesianProbabilisticMatrixFactorization\n",
"\n",
"using LinearAlgebra\n",
"using Statistics\n",
"\n",
"mutable struct VariationalBayesianProbabilisticMatrixFactorizationModel\n",
" M::Array\n",
" I::Int64\n",
" J::Int64\n",
" n::Int64\n",
" U::Array\n",
" V::Array\n",
" τ²::Float64\n",
" σ²::Array\n",
" ρ²::Array\n",
" L::Int64\n",
"end\n",
"\n",
"function fit(model::VariationalBayesianProbabilisticMatrixFactorizationModel)\n",
" M = model.M = model.M[sortperm(model.M[:, 1]), :]\n",
" n = model.n\n",
" I = model.I\n",
" J = model.J\n",
" K = size(M)[1] \n",
" Ψ = zeros(n, n, J)\n",
" model.σ² = ones(n) \n",
" model.ρ² = ones(n) \n",
" model.U = rand(I, n)\n",
" model.V = rand(J, n)\n",
" \n",
" #1.nitialize Sj and tj for j = 1,...,J:\n",
" S, t = initialize_S_t(model, n, J)\n",
" \n",
" for l = 1:model.L \n",
" #2. Update Q(ui) for i = 1,...,I:\n",
" model.U, S, t, τ²_tmp, σ²_tmp = update_U(model, Ψ, S, t, I)\n",
" \n",
" #3. Update Q(vj) for j = 1,...,J:\n",
" model.V, Ψ, ρ²_tmp = update_V(model, Ψ, S, t, J)\n",
" \n",
" #update Learning the Variances\n",
" model.σ², model.ρ², model.τ² = update_learning_variances(model, I, J, K, σ²_tmp, ρ²_tmp, τ²_tmp)\n",
" end \n",
"end\n",
"\n",
"function initialize_S_t(model::VariationalBayesianProbabilisticMatrixFactorizationModel, n, J)\n",
" S = zeros(n, n, J)\n",
" for j = 1:J\n",
" S[:, :, j] = one(zeros(n, n))\n",
" end\n",
" t = zeros(J, n)\n",
" return(S, t)\n",
"end\n",
"\n",
"function update_U(model::VariationalBayesianProbabilisticMatrixFactorizationModel, Ψ, S, t, I)\n",
" n = model.n\n",
" M = model.M\n",
" U = model.U\n",
" V = model.V\n",
" τ² = model.τ²\n",
" σ² = model.σ²\n",
" \n",
" τ²_tmp = 0\n",
" σ²_tmp = zeros(n)\n",
" σ²_matrix = one(zeros(n, n)) .* (1 ./ σ²)\n",
" \n",
" for i = 1:I\n",
" #(a) Compute Φi and ui:\n",
" N_i = M[M[:, 1] .== i - 1, :] #ユーザーごとに評価があるアイテムだけ取り出す\n",
" N_i[:, 1:2] .+= 1 #indexを1始まりにする\n",
" \n",
" Φ = inv(σ²_matrix + sum((Ψ[:, :, N_i[:, 2]] .+ mean(V[N_i[:, 2], :], dims = 1)' * mean(V[N_i[:, 2], :], dims = 1)) / τ², dims = 3)[:, :, 1])\n",
" u = Φ * sum((N_i[:, 3]' * V[N_i[:, 2], :]) / τ², dims = 1)'\n",
" U[i, :] = u \n",
" σ²_tmp = σ²_tmp + diag(Φ)\n",
"\n",
" #(b) Update Sj and tj for j ∈ N(i), and discard Φi:\n",
" idx = 1\n",
" for j = N_i[:, 2]\n",
" S[:, :, j] = S[:, :, j] + (Φ + U[i, :] * U[i, :]') / τ²\n",
" t[j, :] = t[j, :] + ((N_i[idx, 3] * U[i, :]') / τ²)'\n",
" τ²_tmp = τ²_tmp + (N_i[idx, 3] ^ 2) - (2 * N_i[idx, 3] * (U[i, :]' * V[j, :])) + tr((Φ + U[i, :] * U[i, :]') * (Ψ[:, :, j] + V[j, :] * V[j, :]'))\n",
" idx = idx + 1\n",
" end\n",
" end\n",
"\n",
" σ²_tmp = σ²_tmp + (mean(U, dims =1) .^ 2)'\n",
" \n",
" return(U, S, t, τ²_tmp, σ²_tmp)\n",
"end\n",
"\n",
"function update_V(model::VariationalBayesianProbabilisticMatrixFactorizationModel, Ψ, S, t, J)\n",
" V = zeros(J, model.n)\n",
" ρ²_tmp = 0\n",
" #3. Update Q(vj) for j = 1,...,J:\n",
" for j = 1:J\n",
" Ψ[:, :, j] = inv(S[:, :, j])\n",
" V[j, :] = (Ψ[:, :, j] * t[j, :])'\n",
" ρ²_tmp = ρ²_tmp .+ diag(Ψ[:, :, j])\n",
" end\n",
" ρ²_tmp = ρ²_tmp + (mean(V, dims =1) .^ 2)'\n",
" \n",
" return(V, Ψ, ρ²_tmp)\n",
"end\n",
"\n",
"function update_learning_variances(model::VariationalBayesianProbabilisticMatrixFactorizationModel, I, J, K, σ²_tmp, ρ²_tmp, τ²_tmp)\n",
" σ² = (1/(I - 1)) * σ²_tmp\n",
" ρ² = (1/(J- 1)) * ρ²_tmp\n",
" τ² = (1/(K - 1)) * τ²_tmp\n",
" return(σ², ρ² , τ²)\n",
"end\n",
"\n",
"function predict(model::VariationalBayesianProbabilisticMatrixFactorizationModel, new)\n",
" R_p = model.U * model.V'\n",
" r = zeros(size(new)[1])\n",
" for i in 1:size(new)[1]\n",
" r[i] = R_p[new[i, 1] + 1, new[i, 2] + 1]\n",
" end\n",
" return r\n",
"end\n",
"\n",
"end"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# experiment 1"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"6"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#data\n",
"r = [0 0 7;\n",
" 0 1 6;\n",
" 0 2 7;\n",
" 0 3 4;\n",
" 0 4 5;\n",
" 0 5 4;\n",
" 1 0 6;\n",
" 1 1 7;\n",
" 1 3 4;\n",
" 1 4 3;\n",
" 1 5 4;\n",
" 2 1 3;\n",
" 2 2 3;\n",
" 2 3 1;\n",
" 2 4 1;\n",
" 3 0 1;\n",
" 3 1 2;\n",
" 3 2 2;\n",
" 3 3 3;\n",
" 3 4 3;\n",
" 3 5 4;\n",
" 4 0 1;\n",
" 4 2 1;\n",
" 4 3 2;\n",
" 4 4 3;\n",
" 4 5 3]\n",
"\n",
"#ユニークユーザー、ユニークアイテム\n",
"n_user = length(unique(r[:, 1]))\n",
"n_item = length(unique(r[:, 2]))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"main (generic function with 1 method)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"function main()\n",
" VBPMF = VariationalBayesianProbabilisticMatrixFactorization.VariationalBayesianProbabilisticMatrixFactorizationModel(r, n_user, n_item, 3, [], [], 1, [], [], 10)\n",
" VariationalBayesianProbabilisticMatrixFactorization.fit(VBPMF)\n",
" sqrt(mean((r[:, 3] - VariationalBayesianProbabilisticMatrixFactorization.predict(VBPMF, r)) .^ 2))\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0.006970 seconds (40.55 k allocations: 2.130 MiB)\n"
]
},
{
"data": {
"text/plain": [
"0.5478565743277313"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#time \n",
"@time main()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"26-element Array{Float64,1}:\n",
" 6.692476057466172 \n",
" 6.1856309538003025\n",
" 6.6023995851109225\n",
" 3.843760930267997 \n",
" 4.368901630969246 \n",
" 3.8651527589913917\n",
" 5.379784014391833 \n",
" 6.080367423692413 \n",
" 3.8000648534494146\n",
" 3.570123259237677 \n",
" 4.132226272977147 \n",
" 2.432525435655192 \n",
" 2.2979338001075487\n",
" 1.385521188507398 \n",
" 1.3317745378407813\n",
" 1.4525667678497998\n",
" 2.3190717457234187\n",
" 1.82068120537136 \n",
" 2.3566533381545547\n",
" 2.4921900542852646\n",
" 3.168846326743875 \n",
" 1.1256917357867615\n",
" 1.288459642477627 \n",
" 1.8376837156781873\n",
" 2.0115436500847883\n",
" 2.458109288645016 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#predict\n",
"VBPMF = VariationalBayesianProbabilisticMatrixFactorization.VariationalBayesianProbabilisticMatrixFactorizationModel(r, n_user, n_item, 10, [], [], 1, [], [], 1)\n",
"VariationalBayesianProbabilisticMatrixFactorization.fit(VBPMF)\n",
"VariationalBayesianProbabilisticMatrixFactorization.predict(VBPMF, r)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# # experiment 2: movie lens"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"using DataFrames\n",
"using CSV"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"1682"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#データ読み込み\n",
"r = CSV.read(\"ml-100k/u.data\", header = false, delim = '\\t')\n",
"\n",
"#配列化\n",
"r = convert(Array{Int64}, r[:, 1:3])\n",
"\n",
"#オフセット idの最小値を0にする\n",
"r[:, 1, :] = r[:, 1, :] .- 1\n",
"r[:, 2, :] = r[:, 2, :] .- 1\n",
"\n",
"#ユニークユーザー、ユニークアイテム\n",
"n_user = length(unique(r[:, 1]))\n",
"n_item = length(unique(r[:, 2]))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"main (generic function with 1 method)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"function main()\n",
" VBPMF = VariationalBayesianProbabilisticMatrixFactorization.VariationalBayesianProbabilisticMatrixFactorizationModel(r, n_user, n_item,10, [], [], 1, [], [], 10)\n",
" VariationalBayesianProbabilisticMatrixFactorization.fit(VBPMF)\n",
" sqrt(mean((r[:, 3] - VariationalBayesianProbabilisticMatrixFactorization.predict(VBPMF, r)) .^ 2))\n",
"end"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 26.401632 seconds (148.09 M allocations: 23.299 GiB, 5.14% gc time)\n"
]
},
{
"data": {
"text/plain": [
"0.8555094690414169"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#time\n",
"@time main()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"100000-element Array{Float64,1}:\n",
" 3.9933868658763623\n",
" 4.1652958490420815\n",
" 1.7774480639911185\n",
" 3.3804985286262745\n",
" 3.6162312650676114\n",
" 4.4606586454335115\n",
" 3.89875886270602 \n",
" 3.8400447667764297\n",
" 3.029830799367223 \n",
" 3.8178940781622908\n",
" 3.297606482041059 \n",
" 3.6495722465942797\n",
" 4.445461973887172 \n",
" ⋮ \n",
" 4.102798609570239 \n",
" 3.321997704823345 \n",
" 3.6432358520785573\n",
" 3.54090906622547 \n",
" 3.7946662281163213\n",
" 3.588008394325686 \n",
" 2.463932613223278 \n",
" 2.953927707501584 \n",
" 4.064679532183598 \n",
" 1.5777487154139653\n",
" 2.345688928963001 \n",
" 4.17260246294854 "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#predict\n",
"VBPMF = VariationalBayesianProbabilisticMatrixFactorization.VariationalBayesianProbabilisticMatrixFactorizationModel(r, n_user, n_item, 10, [], [], 1, [], [], 10)\n",
"VariationalBayesianProbabilisticMatrixFactorization.fit(VBPMF)\n",
"VariationalBayesianProbabilisticMatrixFactorization.predict(VBPMF, r)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"20001×3 Array{Int64,2}:\n",
" 276 1007 3\n",
" 862 1430 4\n",
" 760 1286 1\n",
" 862 321 1\n",
" 827 693 2\n",
" 888 522 4\n",
" 847 497 5\n",
" 215 150 3\n",
" 879 1164 2\n",
" 757 142 5\n",
" 495 195 3\n",
" 757 216 2\n",
" 479 55 4\n",
" ⋮ \n",
" 420 497 4\n",
" 494 1090 4\n",
" 805 420 4\n",
" 675 537 4\n",
" 720 261 3\n",
" 912 208 2\n",
" 377 77 3\n",
" 879 475 3\n",
" 715 203 5\n",
" 275 1089 1\n",
" 12 224 2\n",
" 11 202 3"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#学習データとテストデータ分割\n",
"N = size(r)[1]\n",
"train_size = Int64(N * 0.8)\n",
"train_df = r[1:train_size, :]\n",
"test_df = r[train_size:N, :]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9579913656751956"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#predict\n",
"VBPMF = VariationalBayesianProbabilisticMatrixFactorization.VariationalBayesianProbabilisticMatrixFactorizationModel(train_df, n_user, n_item,10, [], [], 1, [], [], 10)\n",
"VariationalBayesianProbabilisticMatrixFactorization.fit(VBPMF)\n",
"sqrt(mean((test_df[:, 3] - VariationalBayesianProbabilisticMatrixFactorization.predict(VBPMF, test_df)) .^ 2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Julia 1.0.0",
"language": "julia",
"name": "julia-1.0"
},
"language_info": {
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
"version": "1.0.0"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment