IshitaTakeshi/.gitignore

## .gitignore
*.csv

## lasso.jl

type Lasso <: Model
    w::AA{Float64, 1}
    b::Float64
    λ::Float64
    η::Float64

    Lasso(;λ = 1.0, η = 0.01) = new(zeros(Float64, 0), 0., λ, η)
end


proxₗ₁(λ, w) = sign.(w) .* max.(abs.(w) - λ, 0)


function update!{T <: Real}(model::Lasso, X::AA{Float64, 2}, y::AA{T, 1})
    for i in 1:size(X, 2)
        Δw, Δb = ∇L(model, view(X, :, i), y[i])
        model.w = proxₗ₁(model.λ, model.w - model.η * Δw)
        model.b = proxₗ₁(model.λ, model.b - model.η * Δb)
    end
    return model
end


function init_weights!(model::Lasso, X::AA{Float64, 2})
    model.w = zeros(size(X, 1))
    model.b = 0
    return model
end


function fit!{T <: Real}(model::Lasso, X::AA{Float64, 2}, y::AA{T, 1};
                         n_iter = 200)
    assert(size(X, 2) == size(y, 1))
    init_weights!(model, X)
    for i in 1:n_iter
        model = update!(model, X, y)
    end
    return model
end


function ∇L{T <: Real}(model::Lasso, x::AA{Float64, 1}, y::T)
    yₚ = predict(model, x)
    Δ = yₚ - y
    return x * Δ, Δ
end


function ∇L{T <: Real}(model::Lasso, X::AA{Float64, 2}, y::AA{T, 1})
    n_features, n_samples = size(X)
    Δw = zeros(n_features)
    Δb = 0

    for i in 1:n_samples
        Δwᵢ, Δbᵢ = ∇L(model, view(X, :, i), y[i])
        Δw += Δwᵢ
        Δb += Δbᵢ
    end
    Δw = Δw / n_samples
    Δb = Δb / n_samples
    return Δw, Δb
end


predict(model::Lasso, x::AA{Float64, 1}) = dot(model.w, x) + model.b
predict(model::Lasso, X::AA{Float64, 2}) = vec(model.w' * X + model.b)

## main.jl
# data is available at https://archive.ics.uci.edu/ml/datasets/Wine+Quality

using CSV

srand(1234)

const AA = AbstractArray

include("model.jl")
include("utils.jl")
include("lasso.jl")
include("visualization.jl")
include("datasets/wine.jl")
include("datasets/make_regression.jl")


function search_param(X::AA{Float64, 2}, y::AA{Float64, 1}, λ::Float64)
    min_error = Inf
    argmin_η = 0
    argmin_model = nothing

    for η in [1e-6, 5e-6, 1e-5, 5e-5, 1e-4]
        model = Lasso(λ = λ, η = η)

        model = fit!(model, X, y, n_iter = 200)

        E = mean_squared_error(y, predict(model, X))
        if E < min_error
            min_error = E
            argmin_η = η
            argmin_model = model
        end
    end
    return min_error, argmin_η, argmin_model
end


X, y = make_regression(400, [4.0, 0.0], 0.8; noise = 3.0)


for λ in [0.0, 1e-6, 5e-6, 1e-5, 5e-5, 1e-4, 5e-4]
    min_error, argmin_η, model = search_param(X, y, λ)

    println("")
    println("λ = $λ   η = $argmin_η")
    println("error = $min_error")
    println(model.w)
    println(model.b)

    plot_regression_line(model, X, y)
    clf()

    plot_regression_surface(model, X, y)

    clf()
end

## model.jl
abstract type Model end

## test.jl
using Base.Test

import Base.error

const AA = AbstractArray

include("utils.jl")


function test_mean_std_normalization()
    X = Float64[
        3  4 -1  3  0;
        9  1  6 -8  1;
        3  9  0 -4 -4;
        5 -1  9  7 -1;
    ]
    X = mean_std_normalization(X)

    for x in mean(X, 1)
        @test abs(x) ≈ 0.0 atol=1e-10
    end

    for x in std(X, 1)
        @test abs(x) ≈ 1.0 atol=1e-10
    end
end


function test_error()
    yₜ = [1, 2, 1, 3, 4]
    yₚ = [1, 3, 1, 1, 3]
    @test error(yₜ, yₚ) == 1.2
end

test_mean_std_normalization()
test_error()

## utils.jl
function meshgrid(xs1::AA{Float64, 1}, xs2::AA{Float64, 1})
    return ([x₁ for x₁=xs1, x₂=xs2], [x₂ for x₁=xs1, x₂=xs2])
end


function train_test_split(X, y; train_ratio = 0.8)
    assert(size(X, 2) == size(y, 1))

    N = size(X, 2)

    n_train = Int(round(N * train_ratio))

    indices = shuffle(1:N)
    train_indices = indices[1:n_train]
    test_indices = indices[n_train+1:end]

    X_train, X_test = X[:, train_indices], X[:, test_indices]
    y_train, y_test = y[train_indices], y[test_indices]
    return X_train, X_test, y_train, y_test
end


function mean_std_normalization(X::AA{Float64, 2})
    X = X .- mean(X, 1)
    X = X ./ std(X, 1)
    return X
end


function mean_squared_error{T, U <: Real}(yₜ::AA{T, 1}, yₚ::AA{U, 1})
    assert(length(yₜ) == length(yₚ))
    N = length(yₜ)
    dot(yₜ - yₚ, yₜ - yₚ) / N
end

## visualization.jl
using PyPlot


function plot_regression_line(model::Model,
                              X::AA{Float64, 2}, y::AA{Float64, 1})

    N = 201
    n_features = size(X, 1)
    for i in 1:n_features
        subplot(n_features, 1, i)

        u = view(X, i, :)
        scatter(u, y)

        # draw the regression line
        Xₚ = zeros(n_features, N)
        v = linspace(minimum(u), maximum(u), N)
        Xₚ[i, :] = v
        plot(v, predict(model, Xₚ))
    end

    show()
end


function plot_regression_surface(model::Model, X::AA{Float64, 2}, y::AA{Float64, 1})
    assert(size(X, 1) == 2)

    x1, x2 = view(X, 1, :), view(X, 2, :)

    scatter3D(x1, x2, y)

    N = 201
    L₁, H₁ = minimum(x1), maximum(x1)
    L₂, H₂ = minimum(x2), maximum(x2)

    xs1 = linspace(L₁, H₁, N)
    xs2 = linspace(L₂, H₂, N)

    X₁, X₂ = meshgrid(xs1, xs2)
    Y = [predict(model, [x₁, x₂]) for x₁=xs1, x₂=xs2]
    plot_surface(X₁, X₂, Y)
    show()
end

	type Lasso <: Model
	w::AA{Float64, 1}
	b::Float64
	λ::Float64
	η::Float64

	Lasso(;λ = 1.0, η = 0.01) = new(zeros(Float64, 0), 0., λ, η)
	end


	proxₗ₁(λ, w) = sign.(w) .* max.(abs.(w) - λ, 0)


	function update!{T <: Real}(model::Lasso, X::AA{Float64, 2}, y::AA{T, 1})
	for i in 1:size(X, 2)
	Δw, Δb = ∇L(model, view(X, :, i), y[i])
	model.w = proxₗ₁(model.λ, model.w - model.η * Δw)
	model.b = proxₗ₁(model.λ, model.b - model.η * Δb)
	end
	return model
	end


	function init_weights!(model::Lasso, X::AA{Float64, 2})
	model.w = zeros(size(X, 1))
	model.b = 0
	return model
	end


	function fit!{T <: Real}(model::Lasso, X::AA{Float64, 2}, y::AA{T, 1};
	n_iter = 200)
	assert(size(X, 2) == size(y, 1))
	init_weights!(model, X)
	for i in 1:n_iter
	model = update!(model, X, y)
	end
	return model
	end


	function ∇L{T <: Real}(model::Lasso, x::AA{Float64, 1}, y::T)
	yₚ = predict(model, x)
	Δ = yₚ - y
	return x * Δ, Δ
	end


	function ∇L{T <: Real}(model::Lasso, X::AA{Float64, 2}, y::AA{T, 1})
	n_features, n_samples = size(X)
	Δw = zeros(n_features)
	Δb = 0

	for i in 1:n_samples
	Δwᵢ, Δbᵢ = ∇L(model, view(X, :, i), y[i])
	Δw += Δwᵢ
	Δb += Δbᵢ
	end
	Δw = Δw / n_samples
	Δb = Δb / n_samples
	return Δw, Δb
	end


	predict(model::Lasso, x::AA{Float64, 1}) = dot(model.w, x) + model.b
	predict(model::Lasso, X::AA{Float64, 2}) = vec(model.w' * X + model.b)
	# data is available at https://archive.ics.uci.edu/ml/datasets/Wine+Quality

	using CSV

	srand(1234)

	const AA = AbstractArray

	include("model.jl")
	include("utils.jl")
	include("lasso.jl")
	include("visualization.jl")
	include("datasets/wine.jl")
	include("datasets/make_regression.jl")


	function search_param(X::AA{Float64, 2}, y::AA{Float64, 1}, λ::Float64)
	min_error = Inf
	argmin_η = 0
	argmin_model = nothing

	for η in [1e-6, 5e-6, 1e-5, 5e-5, 1e-4]
	model = Lasso(λ = λ, η = η)

	model = fit!(model, X, y, n_iter = 200)

	E = mean_squared_error(y, predict(model, X))
	if E < min_error
	min_error = E
	argmin_η = η
	argmin_model = model
	end
	end
	return min_error, argmin_η, argmin_model
	end


	X, y = make_regression(400, [4.0, 0.0], 0.8; noise = 3.0)


	for λ in [0.0, 1e-6, 5e-6, 1e-5, 5e-5, 1e-4, 5e-4]
	min_error, argmin_η, model = search_param(X, y, λ)

	println("")
	println("λ = $λ η = $argmin_η")
	println("error = $min_error")
	println(model.w)
	println(model.b)

	plot_regression_line(model, X, y)
	clf()

	plot_regression_surface(model, X, y)

	clf()
	end
	using Base.Test

	import Base.error

	const AA = AbstractArray

	include("utils.jl")


	function test_mean_std_normalization()
	X = Float64[
	3 4 -1 3 0;
	9 1 6 -8 1;
	3 9 0 -4 -4;
	5 -1 9 7 -1;
	]
	X = mean_std_normalization(X)

	for x in mean(X, 1)
	@test abs(x) ≈ 0.0 atol=1e-10
	end

	for x in std(X, 1)
	@test abs(x) ≈ 1.0 atol=1e-10
	end
	end


	function test_error()
	yₜ = [1, 2, 1, 3, 4]
	yₚ = [1, 3, 1, 1, 3]
	@test error(yₜ, yₚ) == 1.2
	end

	test_mean_std_normalization()
	test_error()
	function meshgrid(xs1::AA{Float64, 1}, xs2::AA{Float64, 1})
	return ([x₁ for x₁=xs1, x₂=xs2], [x₂ for x₁=xs1, x₂=xs2])
	end


	function train_test_split(X, y; train_ratio = 0.8)
	assert(size(X, 2) == size(y, 1))

	N = size(X, 2)

	n_train = Int(round(N * train_ratio))

	indices = shuffle(1:N)
	train_indices = indices[1:n_train]
	test_indices = indices[n_train+1:end]

	X_train, X_test = X[:, train_indices], X[:, test_indices]
	y_train, y_test = y[train_indices], y[test_indices]
	return X_train, X_test, y_train, y_test
	end


	function mean_std_normalization(X::AA{Float64, 2})
	X = X .- mean(X, 1)
	X = X ./ std(X, 1)
	return X
	end


	function mean_squared_error{T, U <: Real}(yₜ::AA{T, 1}, yₚ::AA{U, 1})
	assert(length(yₜ) == length(yₚ))
	N = length(yₜ)
	dot(yₜ - yₚ, yₜ - yₚ) / N
	end
	using PyPlot


	function plot_regression_line(model::Model,
	X::AA{Float64, 2}, y::AA{Float64, 1})

	N = 201
	n_features = size(X, 1)
	for i in 1:n_features
	subplot(n_features, 1, i)

	u = view(X, i, :)
	scatter(u, y)

	# draw the regression line
	Xₚ = zeros(n_features, N)
	v = linspace(minimum(u), maximum(u), N)
	Xₚ[i, :] = v
	plot(v, predict(model, Xₚ))
	end

	show()
	end


	function plot_regression_surface(model::Model, X::AA{Float64, 2}, y::AA{Float64, 1})
	assert(size(X, 1) == 2)

	x1, x2 = view(X, 1, :), view(X, 2, :)

	scatter3D(x1, x2, y)

	N = 201
	L₁, H₁ = minimum(x1), maximum(x1)
	L₂, H₂ = minimum(x2), maximum(x2)

	xs1 = linspace(L₁, H₁, N)
	xs2 = linspace(L₂, H₂, N)

	X₁, X₂ = meshgrid(xs1, xs2)
	Y = [predict(model, [x₁, x₂]) for x₁=xs1, x₂=xs2]
	plot_surface(X₁, X₂, Y)
	show()
	end