Skip to content

Instantly share code, notes, and snippets.

@jinliangwei
Last active April 14, 2018 03:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jinliangwei/74d3871bebc1251b651cc6fd511fed08 to your computer and use it in GitHub Desktop.
Save jinliangwei/74d3871bebc1251b651cc6fd511fed08 to your computer and use it in GitHub Desktop.
const data_path = "ratings.csv"
const K = 100
const num_iterations = 2
const alpha = 0.1
function parse_line(line::AbstractString)
tokens = split(line, ',')
@assert length(tokens) == 3
token_tuple = (parse(Int64, String(tokens[1])),
parse(Int64, String(tokens[2])),
parse(Float32, String(tokens[3])))
return token_tuple
end
function load_data(path::AbstractString)
num_lines::Int64 = 0
ratings = Array{Tuple{Int, Int, Float32}}(0)
open(path, "r") do dataf
for line::String in eachline(dataf)
token_tuple = parse_line(line)
push!(ratings, token_tuple)
end
end
return ratings
end
function get_dimension(ratings::Array{Tuple{Int, Int, Float32}})
max_x = 0
max_y = 0
for idx in eachindex(ratings)
if ratings[idx][1] > max_x
max_x = ratings[idx][1]
end
if ratings[idx][2] > max_y
max_y = ratings[idx][2]
end
end
return max_x + 1, max_y + 1
end
println("serial sgd mf starts here!")
ratings = load_data(data_path)
println("load data done!")
W_grad_vec = zeros(Float32, K)
H_grad_vec = zeros(Float32, K)
W_lr = zeros(Float32, K)
H_lr = zeros(Float32, K)
W_lr_old = zeros(Float32, K)
H_lr_old = zeros(Float32, K)
dim_x, dim_y = get_dimension(ratings)
println((dim_x, dim_y))
W_mat = randn(Float32, K, dim_x) ./ 10
H_mat = randn(Float32, K, dim_y) ./ 10
W_z_mat = ones(Float32, K, dim_x)
H_z_mat = ones(Float32, K, dim_y)
function sgd_element(rating,
alpha,
W,
H,
W_z,
H_z,
W_grad,
H_grad,
W_lr,
H_lr,
W_lr_old,
H_lr_old)
x_idx = rating[1] + 1
y_idx = rating[2] + 1
rv = rating[3]
W_row = @view W[:, x_idx]
H_row = @view H[:, y_idx]
pred = dot(W_row, H_row)
diff = rv - pred
W_grad .= (-2 * diff) .* H_row
H_grad .= (-2 * diff) .* W_row
W_z_row = @view W_z[:, x_idx]
H_z_row = @view H_z[:, y_idx]
W_lr_old .= alpha ./ (W_z_row .^ 0.5)
H_lr_old .= alpha ./ (H_z_row .^ 0.5)
W[:, x_idx] .= W_row
H[:, y_idx] .= H_row
end
function sgd_batch(ratings, alpha, num_iterations,
W, H, W_z, H_z,
W_grad, H_grad, W_lr, H_lr, W_lr_old,
H_lr_old)
for rating in ratings
sgd_element(rating, alpha,
W, H, W_z, H_z,
W_grad, H_grad, W_lr, H_lr, W_lr_old,
H_lr_old)
end
end
for iteration = 1:num_iterations
@time sgd_batch(ratings, alpha, num_iterations,
W_mat, H_mat, W_z_mat, H_z_mat,
W_grad_vec, H_grad_vec, W_lr, H_lr, W_lr_old,
H_lr_old)
end
const data_path = "ratings.csv"
const K = 100
const num_iterations = 2
const alpha = 0.1
function parse_line(line::AbstractString)
tokens = split(line, ',')
@assert length(tokens) == 3
token_tuple = (parse(Int64, String(tokens[1])),
parse(Int64, String(tokens[2])),
parse(Float32, String(tokens[3])))
return token_tuple
end
function load_data(path::AbstractString)
num_lines::Int64 = 0
ratings = Array{Tuple{Int, Int, Float64}}(0)
open(path, "r") do dataf
for line::String in eachline(dataf)
token_tuple = parse_line(line)
push!(ratings, token_tuple)
end
end
return ratings
end
function get_dimension(ratings::Array{Tuple{Int, Int, Float64}})
max_x = 0
max_y = 0
for idx in eachindex(ratings)
if ratings[idx][1] > max_x
max_x = ratings[idx][1]
end
if ratings[idx][2] > max_y
max_y = ratings[idx][2]
end
end
return max_x + 1, max_y + 1
end
println("serial sgd mf starts here!")
ratings = load_data(data_path)
println("load data done!")
W_grad_vec = zeros(Float64, K)
H_grad_vec = zeros(Float64, K)
W_lr = zeros(Float64, K)
H_lr = zeros(Float64, K)
W_lr_old = zeros(Float64, K)
H_lr_old = zeros(Float64, K)
dim_x, dim_y = get_dimension(ratings)
println((dim_x, dim_y))
W_mat = randn(Float64, K, dim_x) ./ 10
H_mat = randn(Float64, K, dim_y) ./ 10
W_z_mat = ones(Float64, K, dim_x)
H_z_mat = ones(Float64, K, dim_y)
function sgd_element(rating,
alpha,
W,
H,
W_z,
H_z,
W_grad,
H_grad,
W_lr,
H_lr,
W_lr_old,
H_lr_old)
x_idx = rating[1] + 1
y_idx = rating[2] + 1
rv = rating[3]
W_row = @view W[:, x_idx]
H_row = @view H[:, y_idx]
pred = dot(W_row, H_row)
diff = rv - pred
W_grad .= (-2 * diff) .* H_row
H_grad .= (-2 * diff) .* W_row
W_z_row = @view W_z[:, x_idx]
H_z_row = @view H_z[:, y_idx]
W_lr_old .= alpha ./ (W_z_row .^ 0.5)
H_lr_old .= alpha ./ (H_z_row .^ 0.5)
W[:, x_idx] .= W_row
H[:, y_idx] .= H_row
end
function sgd_batch(ratings, alpha, num_iterations,
W, H, W_z, H_z,
W_grad, H_grad, W_lr, H_lr, W_lr_old,
H_lr_old)
for rating in ratings
sgd_element(rating, alpha,
W, H, W_z, H_z,
W_grad, H_grad, W_lr, H_lr, W_lr_old,
H_lr_old)
end
end
for iteration = 1:num_iterations
@time sgd_batch(ratings, alpha, num_iterations,
W_mat, H_mat, W_z_mat, H_z_mat,
W_grad_vec, H_grad_vec, W_lr, H_lr, W_lr_old,
H_lr_old)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment