jinliangwei/my_array_test.jl

## my_array_test.jl
const K = 100
const num_iterations = 1
const step_size = 0.01
const dim_x = 5000
const dim_y = 5000
const num_elements = 100

struct DenseArrayAccessor{T, N} <: AbstractArray{T, N}
    key_begin::Int64
    values::Vector{T}
    dims::NTuple{N, Int64}
    DenseArrayAccessor{T, N}(key_begin::Int64,
                             values::Vector{T},
                             dims::Vector{Int64}) where {T, N} = new(key_begin,
                                                                     values,
                                                                     tuple(dims...))

end

Base.IndexStyle{T<:DenseArrayAccessor}(::Type{T}) = IndexLinear()

function Base.size(accessor::DenseArrayAccessor)
    return accessor.dims
end

function Base.getindex{T, N}(accessor::DenseArrayAccessor{T, N},
                             i::Int)::T
    return accessor.values[i - accessor.key_begin]
end

function Base.setindex!{T, N}(accessor::DenseArrayAccessor{T, N},
                             v, i::Int)
    accessor.values[i - accessor.key_begin] = v
end

function Base.similar{T}(accessor::DenseArrayAccessor,
                         ::Type{T}, dims::Dims)
    return Array{T, length(dims)}(dims)
end

struct DenseArray{T, N} <: AbstractArray{T, N}
    accessor::Nullable{AbstractArray}
    DenseArray{T, N}(accessor::AbstractArray) where {T, N} = new(accessor)
    DenseArray{T, N}() where {T, N} = new(Nullable{AbstractArray}())
end

Base.IndexStyle{T<:DenseArray}(::Type{T}) = IndexLinear()

function Base.size(dist_array::DenseArray)
    return get(dist_array.accessor).dims
end

function Base.getindex{T, N}(dist_array::DenseArray{T, N},
                             I...)::T
    accessor = get(dist_array.accessor)
    return getindex(accessor, I...)
end

function Base.setindex!{T, N}(dist_array::DenseArray{T, N},
                              v, I...)
    accessor = get(dist_array.accessor)
    setindex!(accessor, v, I...)
end

function Base.similar{T, N}(dist_array::DenseArray{T, N},
                            ::Type{T}, dims::Dims)
    accessor = get(dist_array.accessor)
    return similar(accessor, T, dims)
end

function parse_line()
    token_tuple = (rand(1:dim_x),
                   rand(1:dim_y),
                   1.0)
    return token_tuple
end

function load_data()
    num_lines::Int64 = 0
    ratings = Array{Tuple{Int64, Int64, Float64}}(0)
    for i = 1:num_elements
        token_tuple = parse_line()
        push!(ratings, token_tuple)
    end
    return ratings
end

function get_dimension(ratings::Array{Tuple{Int64, Int64, Float64}})
    max_x = 0
    max_y = 0
    for idx in eachindex(ratings)
        if ratings[idx][1] > max_x
            max_x = ratings[idx][1]
        end
        if ratings[idx][2] > max_y
            max_y = ratings[idx][2]
        end
    end
    return max_x + 1, max_y + 1
end


println("serial sgd mf starts here!")
ratings = load_data()
println("load data done!")

function train(ratings, step_size, num_iterations)
    dim_x, dim_y = get_dimension(ratings)
    println((dim_x, dim_y))
    W = DenseArray{Float64, 2}(DenseArrayAccessor{Float64, 2}(0, randn(K * dim_x) ./ 10, [K, dim_x]))
    H = DenseArray{Float64, 2}(DenseArrayAccessor{Float64, 2}(0, randn(K * dim_y) ./ 10, [K, dim_y]))
#    W = DenseArrayAccessor{Float64, 2}(0, randn(K * dim_x) ./ 10, [K, dim_x])
#    H = DenseArrayAccessor{Float64, 2}(0, randn(K * dim_y) ./ 10, [K, dim_y])
    W_grad = zeros(K)
    H_grad = zeros(K)

    @time for iteration = 1:num_iterations
        @time for rating in ratings
            x_idx = rating[1] + 1
            y_idx = rating[2] + 1
            rv = rating[3]
            W_row = @view W[:, x_idx]
            H_row = @view H[:, y_idx]
            pred = dot(W_row, H_row)
            diff = rv - pred
            @. W_grad = -2 * diff * H_row
            @. H_grad = -2 * diff * W_row
            @. W[:, x_idx] = W_row - step_size * W_grad
            @. H[:, y_idx] = H_row - step_size * H_grad
        end
        if iteration % 1 == 0 ||
            iteration == num_iterations
            println("evaluate model")
            err = 0.0
            for rating in ratings
                x_idx = rating[1] + 1
                y_idx = rating[2] + 1
                rv = rating[3]

                W_row = @view W[:, x_idx]
                H_row = @view H[:, y_idx]
                pred = dot(W_row, H_row)
                err += (rv - pred) ^ 2
            end
            println("iteration = ", iteration,
                    " err = ", err)
        end
    end
end

train(ratings, step_size, num_iterations)

@code_warntype train(ratings, step_size, num_iterations)
	const K = 100
	const num_iterations = 1
	const step_size = 0.01
	const dim_x = 5000
	const dim_y = 5000
	const num_elements = 100

	struct DenseArrayAccessor{T, N} <: AbstractArray{T, N}
	key_begin::Int64
	values::Vector{T}
	dims::NTuple{N, Int64}
	DenseArrayAccessor{T, N}(key_begin::Int64,
	values::Vector{T},
	dims::Vector{Int64}) where {T, N} = new(key_begin,
	values,
	tuple(dims...))

	end

	Base.IndexStyle{T<:DenseArrayAccessor}(::Type{T}) = IndexLinear()

	function Base.size(accessor::DenseArrayAccessor)
	return accessor.dims
	end

	function Base.getindex{T, N}(accessor::DenseArrayAccessor{T, N},
	i::Int)::T
	return accessor.values[i - accessor.key_begin]
	end

	function Base.setindex!{T, N}(accessor::DenseArrayAccessor{T, N},
	v, i::Int)
	accessor.values[i - accessor.key_begin] = v
	end

	function Base.similar{T}(accessor::DenseArrayAccessor,
	::Type{T}, dims::Dims)
	return Array{T, length(dims)}(dims)
	end

	struct DenseArray{T, N} <: AbstractArray{T, N}
	accessor::Nullable{AbstractArray}
	DenseArray{T, N}(accessor::AbstractArray) where {T, N} = new(accessor)
	DenseArray{T, N}() where {T, N} = new(Nullable{AbstractArray}())
	end

	Base.IndexStyle{T<:DenseArray}(::Type{T}) = IndexLinear()

	function Base.size(dist_array::DenseArray)
	return get(dist_array.accessor).dims
	end

	function Base.getindex{T, N}(dist_array::DenseArray{T, N},
	I...)::T
	accessor = get(dist_array.accessor)
	return getindex(accessor, I...)
	end

	function Base.setindex!{T, N}(dist_array::DenseArray{T, N},
	v, I...)
	accessor = get(dist_array.accessor)
	setindex!(accessor, v, I...)
	end

	function Base.similar{T, N}(dist_array::DenseArray{T, N},
	::Type{T}, dims::Dims)
	accessor = get(dist_array.accessor)
	return similar(accessor, T, dims)
	end

	function parse_line()
	token_tuple = (rand(1:dim_x),
	rand(1:dim_y),
	1.0)
	return token_tuple
	end

	function load_data()
	num_lines::Int64 = 0
	ratings = Array{Tuple{Int64, Int64, Float64}}(0)
	for i = 1:num_elements
	token_tuple = parse_line()
	push!(ratings, token_tuple)
	end
	return ratings
	end

	function get_dimension(ratings::Array{Tuple{Int64, Int64, Float64}})
	max_x = 0
	max_y = 0
	for idx in eachindex(ratings)
	if ratings[idx][1] > max_x
	max_x = ratings[idx][1]
	end
	if ratings[idx][2] > max_y
	max_y = ratings[idx][2]
	end
	end
	return max_x + 1, max_y + 1
	end


	println("serial sgd mf starts here!")
	ratings = load_data()
	println("load data done!")

	function train(ratings, step_size, num_iterations)
	dim_x, dim_y = get_dimension(ratings)
	println((dim_x, dim_y))
	W = DenseArray{Float64, 2}(DenseArrayAccessor{Float64, 2}(0, randn(K * dim_x) ./ 10, [K, dim_x]))
	H = DenseArray{Float64, 2}(DenseArrayAccessor{Float64, 2}(0, randn(K * dim_y) ./ 10, [K, dim_y]))
	# W = DenseArrayAccessor{Float64, 2}(0, randn(K * dim_x) ./ 10, [K, dim_x])
	# H = DenseArrayAccessor{Float64, 2}(0, randn(K * dim_y) ./ 10, [K, dim_y])
	W_grad = zeros(K)
	H_grad = zeros(K)

	@time for iteration = 1:num_iterations
	@time for rating in ratings
	x_idx = rating[1] + 1
	y_idx = rating[2] + 1
	rv = rating[3]
	W_row = @view W[:, x_idx]
	H_row = @view H[:, y_idx]
	pred = dot(W_row, H_row)
	diff = rv - pred
	@. W_grad = -2 * diff * H_row
	@. H_grad = -2 * diff * W_row
	@. W[:, x_idx] = W_row - step_size * W_grad
	@. H[:, y_idx] = H_row - step_size * H_grad
	end
	if iteration % 1 == 0 \|\|
	iteration == num_iterations
	println("evaluate model")
	err = 0.0
	for rating in ratings
	x_idx = rating[1] + 1
	y_idx = rating[2] + 1
	rv = rating[3]

	W_row = @view W[:, x_idx]
	H_row = @view H[:, y_idx]
	pred = dot(W_row, H_row)
	err += (rv - pred) ^ 2
	end
	println("iteration = ", iteration,
	" err = ", err)
	end
	end
	end

	train(ratings, step_size, num_iterations)

	@code_warntype train(ratings, step_size, num_iterations)