Created
September 11, 2018 01:19
-
-
Save RottenFruits/8135b68eaaf2c50c60fe32c0af3d8253 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# package" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"using Statistics" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# module" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Main.VariationalBayesianProbabilisticMatrixFactorization" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"module VariationalBayesianProbabilisticMatrixFactorization\n", | |
"\n", | |
"using LinearAlgebra\n", | |
"using Statistics\n", | |
"\n", | |
"mutable struct VariationalBayesianProbabilisticMatrixFactorizationModel\n", | |
" M::Array\n", | |
" I::Int64\n", | |
" J::Int64\n", | |
" n::Int64\n", | |
" U::Array\n", | |
" V::Array\n", | |
" τ²::Float64\n", | |
" σ²::Array\n", | |
" ρ²::Array\n", | |
" L::Int64\n", | |
"end\n", | |
"\n", | |
"function fit(model::VariationalBayesianProbabilisticMatrixFactorizationModel)\n", | |
" M = model.M = model.M[sortperm(model.M[:, 1]), :]\n", | |
" n = model.n\n", | |
" I = model.I\n", | |
" J = model.J\n", | |
" K = size(M)[1] \n", | |
" Ψ = zeros(n, n, J)\n", | |
" model.σ² = ones(n) \n", | |
" model.ρ² = ones(n) \n", | |
" model.U = rand(I, n)\n", | |
" model.V = rand(J, n)\n", | |
" \n", | |
" #1.nitialize Sj and tj for j = 1,...,J:\n", | |
" S, t = initialize_S_t(model, n, J)\n", | |
" \n", | |
" for l = 1:model.L \n", | |
" #2. Update Q(ui) for i = 1,...,I:\n", | |
" model.U, S, t, τ²_tmp, σ²_tmp = update_U(model, Ψ, S, t, I)\n", | |
" \n", | |
" #3. Update Q(vj) for j = 1,...,J:\n", | |
" model.V, Ψ, ρ²_tmp = update_V(model, Ψ, S, t, J)\n", | |
" \n", | |
" #update Learning the Variances\n", | |
" model.σ², model.ρ², model.τ² = update_learning_variances(model, I, J, K, σ²_tmp, ρ²_tmp, τ²_tmp)\n", | |
" end \n", | |
"end\n", | |
"\n", | |
"function initialize_S_t(model::VariationalBayesianProbabilisticMatrixFactorizationModel, n, J)\n", | |
" S = zeros(n, n, J)\n", | |
" for j = 1:J\n", | |
" S[:, :, j] = one(zeros(n, n))\n", | |
" end\n", | |
" t = zeros(J, n)\n", | |
" return(S, t)\n", | |
"end\n", | |
"\n", | |
"function update_U(model::VariationalBayesianProbabilisticMatrixFactorizationModel, Ψ, S, t, I)\n", | |
" n = model.n\n", | |
" M = model.M\n", | |
" U = model.U\n", | |
" V = model.V\n", | |
" τ² = model.τ²\n", | |
" σ² = model.σ²\n", | |
" \n", | |
" τ²_tmp = 0\n", | |
" σ²_tmp = zeros(n)\n", | |
" σ²_matrix = one(zeros(n, n)) .* (1 ./ σ²)\n", | |
" \n", | |
" for i = 1:I\n", | |
" #(a) Compute Φi and ui:\n", | |
" N_i = M[M[:, 1] .== i - 1, :] #ユーザーごとに評価があるアイテムだけ取り出す\n", | |
" N_i[:, 1:2] .+= 1 #indexを1始まりにする\n", | |
" \n", | |
" Φ = inv(σ²_matrix + sum((Ψ[:, :, N_i[:, 2]] .+ mean(V[N_i[:, 2], :], dims = 1)' * mean(V[N_i[:, 2], :], dims = 1)) / τ², dims = 3)[:, :, 1])\n", | |
" u = Φ * sum((N_i[:, 3]' * V[N_i[:, 2], :]) / τ², dims = 1)'\n", | |
" U[i, :] = u \n", | |
" σ²_tmp = σ²_tmp + diag(Φ)\n", | |
"\n", | |
" #(b) Update Sj and tj for j ∈ N(i), and discard Φi:\n", | |
" idx = 1\n", | |
" for j = N_i[:, 2]\n", | |
" S[:, :, j] = S[:, :, j] + (Φ + U[i, :] * U[i, :]') / τ²\n", | |
" t[j, :] = t[j, :] + ((N_i[idx, 3] * U[i, :]') / τ²)'\n", | |
" τ²_tmp = τ²_tmp + (N_i[idx, 3] ^ 2) - (2 * N_i[idx, 3] * (U[i, :]' * V[j, :])) + tr((Φ + U[i, :] * U[i, :]') * (Ψ[:, :, j] + V[j, :] * V[j, :]'))\n", | |
" idx = idx + 1\n", | |
" end\n", | |
" end\n", | |
"\n", | |
" σ²_tmp = σ²_tmp + (mean(U, dims =1) .^ 2)'\n", | |
" \n", | |
" return(U, S, t, τ²_tmp, σ²_tmp)\n", | |
"end\n", | |
"\n", | |
"function update_V(model::VariationalBayesianProbabilisticMatrixFactorizationModel, Ψ, S, t, J)\n", | |
" V = zeros(J, model.n)\n", | |
" ρ²_tmp = 0\n", | |
" #3. Update Q(vj) for j = 1,...,J:\n", | |
" for j = 1:J\n", | |
" Ψ[:, :, j] = inv(S[:, :, j])\n", | |
" V[j, :] = (Ψ[:, :, j] * t[j, :])'\n", | |
" ρ²_tmp = ρ²_tmp .+ diag(Ψ[:, :, j])\n", | |
" end\n", | |
" ρ²_tmp = ρ²_tmp + (mean(V, dims =1) .^ 2)'\n", | |
" \n", | |
" return(V, Ψ, ρ²_tmp)\n", | |
"end\n", | |
"\n", | |
"function update_learning_variances(model::VariationalBayesianProbabilisticMatrixFactorizationModel, I, J, K, σ²_tmp, ρ²_tmp, τ²_tmp)\n", | |
" σ² = (1/(I - 1)) * σ²_tmp\n", | |
" ρ² = (1/(J- 1)) * ρ²_tmp\n", | |
" τ² = (1/(K - 1)) * τ²_tmp\n", | |
" return(σ², ρ² , τ²)\n", | |
"end\n", | |
"\n", | |
"function predict(model::VariationalBayesianProbabilisticMatrixFactorizationModel, new)\n", | |
" R_p = model.U * model.V'\n", | |
" r = zeros(size(new)[1])\n", | |
" for i in 1:size(new)[1]\n", | |
" r[i] = R_p[new[i, 1] + 1, new[i, 2] + 1]\n", | |
" end\n", | |
" return r\n", | |
"end\n", | |
"\n", | |
"end" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# experiment 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"6" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#data\n", | |
"r = [0 0 7;\n", | |
" 0 1 6;\n", | |
" 0 2 7;\n", | |
" 0 3 4;\n", | |
" 0 4 5;\n", | |
" 0 5 4;\n", | |
" 1 0 6;\n", | |
" 1 1 7;\n", | |
" 1 3 4;\n", | |
" 1 4 3;\n", | |
" 1 5 4;\n", | |
" 2 1 3;\n", | |
" 2 2 3;\n", | |
" 2 3 1;\n", | |
" 2 4 1;\n", | |
" 3 0 1;\n", | |
" 3 1 2;\n", | |
" 3 2 2;\n", | |
" 3 3 3;\n", | |
" 3 4 3;\n", | |
" 3 5 4;\n", | |
" 4 0 1;\n", | |
" 4 2 1;\n", | |
" 4 3 2;\n", | |
" 4 4 3;\n", | |
" 4 5 3]\n", | |
"\n", | |
"#ユニークユーザー、ユニークアイテム\n", | |
"n_user = length(unique(r[:, 1]))\n", | |
"n_item = length(unique(r[:, 2]))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"main (generic function with 1 method)" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"function main()\n", | |
" VBPMF = VariationalBayesianProbabilisticMatrixFactorization.VariationalBayesianProbabilisticMatrixFactorizationModel(r, n_user, n_item, 3, [], [], 1, [], [], 10)\n", | |
" VariationalBayesianProbabilisticMatrixFactorization.fit(VBPMF)\n", | |
" sqrt(mean((r[:, 3] - VariationalBayesianProbabilisticMatrixFactorization.predict(VBPMF, r)) .^ 2))\n", | |
"end" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" 0.006970 seconds (40.55 k allocations: 2.130 MiB)\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"0.5478565743277313" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#time \n", | |
"@time main()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"26-element Array{Float64,1}:\n", | |
" 6.692476057466172 \n", | |
" 6.1856309538003025\n", | |
" 6.6023995851109225\n", | |
" 3.843760930267997 \n", | |
" 4.368901630969246 \n", | |
" 3.8651527589913917\n", | |
" 5.379784014391833 \n", | |
" 6.080367423692413 \n", | |
" 3.8000648534494146\n", | |
" 3.570123259237677 \n", | |
" 4.132226272977147 \n", | |
" 2.432525435655192 \n", | |
" 2.2979338001075487\n", | |
" 1.385521188507398 \n", | |
" 1.3317745378407813\n", | |
" 1.4525667678497998\n", | |
" 2.3190717457234187\n", | |
" 1.82068120537136 \n", | |
" 2.3566533381545547\n", | |
" 2.4921900542852646\n", | |
" 3.168846326743875 \n", | |
" 1.1256917357867615\n", | |
" 1.288459642477627 \n", | |
" 1.8376837156781873\n", | |
" 2.0115436500847883\n", | |
" 2.458109288645016 " | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#predict\n", | |
"VBPMF = VariationalBayesianProbabilisticMatrixFactorization.VariationalBayesianProbabilisticMatrixFactorizationModel(r, n_user, n_item, 10, [], [], 1, [], [], 1)\n", | |
"VariationalBayesianProbabilisticMatrixFactorization.fit(VBPMF)\n", | |
"VariationalBayesianProbabilisticMatrixFactorization.predict(VBPMF, r)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# # experiment 2: movie lens" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"using DataFrames\n", | |
"using CSV" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1682" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#データ読み込み\n", | |
"r = CSV.read(\"ml-100k/u.data\", header = false, delim = '\\t')\n", | |
"\n", | |
"#配列化\n", | |
"r = convert(Array{Int64}, r[:, 1:3])\n", | |
"\n", | |
"#オフセット idの最小値を0にする\n", | |
"r[:, 1, :] = r[:, 1, :] .- 1\n", | |
"r[:, 2, :] = r[:, 2, :] .- 1\n", | |
"\n", | |
"#ユニークユーザー、ユニークアイテム\n", | |
"n_user = length(unique(r[:, 1]))\n", | |
"n_item = length(unique(r[:, 2]))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"main (generic function with 1 method)" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"function main()\n", | |
" VBPMF = VariationalBayesianProbabilisticMatrixFactorization.VariationalBayesianProbabilisticMatrixFactorizationModel(r, n_user, n_item,10, [], [], 1, [], [], 10)\n", | |
" VariationalBayesianProbabilisticMatrixFactorization.fit(VBPMF)\n", | |
" sqrt(mean((r[:, 3] - VariationalBayesianProbabilisticMatrixFactorization.predict(VBPMF, r)) .^ 2))\n", | |
"end" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" 26.401632 seconds (148.09 M allocations: 23.299 GiB, 5.14% gc time)\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"0.8555094690414169" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#time\n", | |
"@time main()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"100000-element Array{Float64,1}:\n", | |
" 3.9933868658763623\n", | |
" 4.1652958490420815\n", | |
" 1.7774480639911185\n", | |
" 3.3804985286262745\n", | |
" 3.6162312650676114\n", | |
" 4.4606586454335115\n", | |
" 3.89875886270602 \n", | |
" 3.8400447667764297\n", | |
" 3.029830799367223 \n", | |
" 3.8178940781622908\n", | |
" 3.297606482041059 \n", | |
" 3.6495722465942797\n", | |
" 4.445461973887172 \n", | |
" ⋮ \n", | |
" 4.102798609570239 \n", | |
" 3.321997704823345 \n", | |
" 3.6432358520785573\n", | |
" 3.54090906622547 \n", | |
" 3.7946662281163213\n", | |
" 3.588008394325686 \n", | |
" 2.463932613223278 \n", | |
" 2.953927707501584 \n", | |
" 4.064679532183598 \n", | |
" 1.5777487154139653\n", | |
" 2.345688928963001 \n", | |
" 4.17260246294854 " | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#predict\n", | |
"VBPMF = VariationalBayesianProbabilisticMatrixFactorization.VariationalBayesianProbabilisticMatrixFactorizationModel(r, n_user, n_item, 10, [], [], 1, [], [], 10)\n", | |
"VariationalBayesianProbabilisticMatrixFactorization.fit(VBPMF)\n", | |
"VariationalBayesianProbabilisticMatrixFactorization.predict(VBPMF, r)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"20001×3 Array{Int64,2}:\n", | |
" 276 1007 3\n", | |
" 862 1430 4\n", | |
" 760 1286 1\n", | |
" 862 321 1\n", | |
" 827 693 2\n", | |
" 888 522 4\n", | |
" 847 497 5\n", | |
" 215 150 3\n", | |
" 879 1164 2\n", | |
" 757 142 5\n", | |
" 495 195 3\n", | |
" 757 216 2\n", | |
" 479 55 4\n", | |
" ⋮ \n", | |
" 420 497 4\n", | |
" 494 1090 4\n", | |
" 805 420 4\n", | |
" 675 537 4\n", | |
" 720 261 3\n", | |
" 912 208 2\n", | |
" 377 77 3\n", | |
" 879 475 3\n", | |
" 715 203 5\n", | |
" 275 1089 1\n", | |
" 12 224 2\n", | |
" 11 202 3" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#学習データとテストデータ分割\n", | |
"N = size(r)[1]\n", | |
"train_size = Int64(N * 0.8)\n", | |
"train_df = r[1:train_size, :]\n", | |
"test_df = r[train_size:N, :]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.9579913656751956" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#predict\n", | |
"VBPMF = VariationalBayesianProbabilisticMatrixFactorization.VariationalBayesianProbabilisticMatrixFactorizationModel(train_df, n_user, n_item,10, [], [], 1, [], [], 10)\n", | |
"VariationalBayesianProbabilisticMatrixFactorization.fit(VBPMF)\n", | |
"sqrt(mean((test_df[:, 3] - VariationalBayesianProbabilisticMatrixFactorization.predict(VBPMF, test_df)) .^ 2))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Julia 1.0.0", | |
"language": "julia", | |
"name": "julia-1.0" | |
}, | |
"language_info": { | |
"file_extension": ".jl", | |
"mimetype": "application/julia", | |
"name": "julia", | |
"version": "1.0.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment