JulienPascal/Logistic2.jl

## Logistic2.jl
y = convert(Array, df_nba[:SHOT_RESULT]);
X = convert(Matrix, df_nba[[:SHOT_CLOCK, :SHOT_DIST, :CLOSE_DEF_DIST]])
X = hcat(ones(size(X,1)), X);
# Logistic function for a scalar input:
function sigma(x::Float64)
    exp(x)/(1.0 + exp(x))
end

# Logistic function for a vector input:
function sigma(x::Array{Float64,1})
    exp.(x) ./ (1.0 .+ exp.(x))
end

function log_likelihood(y::Array{Float64,1}, X::Array{Float64,2}, theta::Array{Float64,1})
    sum = 0.0
    #Loop over individuals in the sample
    for i=1:size(X,1)
        sum += y[i]*log(sigma(transpose(X[i,:])*theta)) + (1.0 - y[i])*log(1.0 - sigma(transpose(X[i,:])*theta))
    end
    return sum
end
# Function to calculate the gradient of the log-likelihood of the sample:
function derivative_log_likelihood(y::Array{Float64,1}, X::Array{Float64,2}, theta::Array{Float64,1})
    sum = zeros(size(X,2))
    #Loop over individuals in the sample
    for i=1:size(X,1)
        sum .+= (y[i] - sigma(transpose(X[i,:])*theta))*X[i,:]
    end
    return sum
end
# Function to calculate the hessian of the log-likelihood of the sample:
function hessian_log_likelihood(y::Array{Float64,1}, X::Array{Float64,2}, theta::Array{Float64,1})
    hessian = zeros(size(X,2), size(X,2))
    #Loop over individuals in the sample
    for i=1:size(X,1)
        hessian .+= - sigma(transpose(X[i,:])*theta)*(1.0 - sigma(transpose(X[i,:])*theta))*(X[i,:]*transpose(X[i,:]))
    end
    return hessian
end

# Function that calculates the probit estimate using Newton-Raphson
function nr_probit(y, X , theta_initial::Array{Float64,1}; max_iter::Int64 = 1000, tol::Float64=0.01)
    #initial value for theta:
    theta_old = theta_initial
    theta_new = similar(theta_old)
    #convergence reached?
    success_flag = 0
    #Let's store the convergence history
    history= fill!(zeros(max_iter), NaN)
    for i=1:max_iter
        theta_new = theta_old - inv(hessian_log_likelihood(y, X, theta_old))*derivative_log_likelihood(y, X, theta_old)
        diff = maximum(abs, theta_new .- theta_old)
        history[i] = diff
        if diff < tol
            success_flag = 1
            break
        end
        theta_old = theta_new
    end

    return theta_new, success_flag, history[isnan.(history) .== false]

end

theta_guess = zeros(size(X,2)) #initial guess
@time theta, flag, history = nr_probit(y, X, theta_guess, max_iter=1000, tol=0.0001);
plot(history, label= "error", title = "Convergence of the gradient descent algorithm")
print("Estimate for theta is $(theta)")
	y = convert(Array, df_nba[:SHOT_RESULT]);
	X = convert(Matrix, df_nba[[:SHOT_CLOCK, :SHOT_DIST, :CLOSE_DEF_DIST]])
	X = hcat(ones(size(X,1)), X);
	# Logistic function for a scalar input:
	function sigma(x::Float64)
	exp(x)/(1.0 + exp(x))
	end

	# Logistic function for a vector input:
	function sigma(x::Array{Float64,1})
	exp.(x) ./ (1.0 .+ exp.(x))
	end

	function log_likelihood(y::Array{Float64,1}, X::Array{Float64,2}, theta::Array{Float64,1})
	sum = 0.0
	#Loop over individuals in the sample
	for i=1:size(X,1)
	sum += y[i]log(sigma(transpose(X[i,:])theta)) + (1.0 - y[i])log(1.0 - sigma(transpose(X[i,:])theta))
	end
	return sum
	end
	# Function to calculate the gradient of the log-likelihood of the sample:
	function derivative_log_likelihood(y::Array{Float64,1}, X::Array{Float64,2}, theta::Array{Float64,1})
	sum = zeros(size(X,2))
	#Loop over individuals in the sample
	for i=1:size(X,1)
	sum .+= (y[i] - sigma(transpose(X[i,:])theta))X[i,:]
	end
	return sum
	end
	# Function to calculate the hessian of the log-likelihood of the sample:
	function hessian_log_likelihood(y::Array{Float64,1}, X::Array{Float64,2}, theta::Array{Float64,1})
	hessian = zeros(size(X,2), size(X,2))
	#Loop over individuals in the sample
	for i=1:size(X,1)
	hessian .+= - sigma(transpose(X[i,:])theta)(1.0 - sigma(transpose(X[i,:])theta))(X[i,:]*transpose(X[i,:]))
	end
	return hessian
	end

	# Function that calculates the probit estimate using Newton-Raphson
	function nr_probit(y, X , theta_initial::Array{Float64,1}; max_iter::Int64 = 1000, tol::Float64=0.01)
	#initial value for theta:
	theta_old = theta_initial
	theta_new = similar(theta_old)
	#convergence reached?
	success_flag = 0
	#Let's store the convergence history
	history= fill!(zeros(max_iter), NaN)
	for i=1:max_iter
	theta_new = theta_old - inv(hessian_log_likelihood(y, X, theta_old))*derivative_log_likelihood(y, X, theta_old)
	diff = maximum(abs, theta_new .- theta_old)
	history[i] = diff
	if diff < tol
	success_flag = 1
	break
	end
	theta_old = theta_new
	end

	return theta_new, success_flag, history[isnan.(history) .== false]

	end

	theta_guess = zeros(size(X,2)) #initial guess
	@time theta, flag, history = nr_probit(y, X, theta_guess, max_iter=1000, tol=0.0001);
	plot(history, label= "error", title = "Convergence of the gradient descent algorithm")
	print("Estimate for theta is $(theta)")