johnmyleswhite/gist:5248212

## gistfile1.jl
# Generate (x, y) data with a sparse set of active predictors
# prob controls the frequency of predictors having zero effect
function simulate_date(n::Integer, p::Integer, prob::Real)
  x = randn(n, p)
	beta = randn(p)
	for j in 1:p
		if rand() < prob
			beta[j] = 0.0
		end
	end
	y = x * beta
	for i in 1:n
		y[i] = y[i] + 0.01 * randn()
	end
	return x, y, beta
end

# Estimate sparse model coefficients
function forward_stagewise(x::Matrix, y::Vector, max_itr::Integer, tolerance::Real)
	# Maintain parameter space
	n, p = size(x)

	# Maintain a vector of residuals
	r = y

	# Mainain a vector of inferred coefficients
	beta = zeros(p)

	# Select a "step-size"
	# This determines digits of precision
	epsilon = 0.01

	# Monitor convergence
	old_norm, new_norm = Inf, Inf
	converged = false

	# Count iterations
	itr = 0

	# Iterate until convergence
	while !converged && itr < max_itr
		itr += 1

		# Find the predictor maximally correlated with residuals
		max_correlation = 0.0
		best_predictor = 0
		for j in 1:p
			current_correlation = abs(cor(r, x[:, j]))
			if current_correlation > max_correlation
				best_predictor = j
				max_correlation = current_correlation
			end
		end

		# Determine the coefficient update
		delta = epsilon * sign(dot(x[:, best_predictor], r))
		beta[best_predictor] = beta[best_predictor] + delta

		# Update residuals
		for i in 1:n
			r[i] = r[i] - delta * x[i, best_predictor]
		end

		# Assess convergence
		old_norm = new_norm
		new_norm = norm(r)

		# This can cycle
		if abs(old_norm - new_norm) < tolerance || old_norm < new_norm
			converged = true
		end

		# Show trace information
		if mod(itr, 100) == 0
			@printf "Iteration: %d\n" itr
			@printf "Previous Residual Norm: %0.4f\n" old_norm
			@printf "Current Residual Norm: %0.4f\n" new_norm
		end
	end

	# Return estimate coefficients
	return beta
end

srand(2)
x, y, beta = simulate_date(1_000, 100, 0.75)
beta_hat = forward_stagewise(x, y, 10_000, 1e-4)
beta_ols = inv(x'x) * x' * y

@printf "beta: %s,%s\n" beta[1:4] beta[(end - 3):end]
@printf "beta ~LASSO: %s,%s\n" beta_hat[1:4] beta_hat[(end - 3):end]
@printf "beta OLS: %s\n          %s\n" beta_ols[1:4] beta_ols[(end - 3):end]
	# Generate (x, y) data with a sparse set of active predictors
	# prob controls the frequency of predictors having zero effect
	function simulate_date(n::Integer, p::Integer, prob::Real)
	x = randn(n, p)
	beta = randn(p)
	for j in 1:p
	if rand() < prob
	beta[j] = 0.0
	end
	end
	y = x * beta
	for i in 1:n
	y[i] = y[i] + 0.01 * randn()
	end
	return x, y, beta
	end

	# Estimate sparse model coefficients
	function forward_stagewise(x::Matrix, y::Vector, max_itr::Integer, tolerance::Real)
	# Maintain parameter space
	n, p = size(x)

	# Maintain a vector of residuals
	r = y

	# Mainain a vector of inferred coefficients
	beta = zeros(p)

	# Select a "step-size"
	# This determines digits of precision
	epsilon = 0.01

	# Monitor convergence
	old_norm, new_norm = Inf, Inf
	converged = false

	# Count iterations
	itr = 0

	# Iterate until convergence
	while !converged && itr < max_itr
	itr += 1

	# Find the predictor maximally correlated with residuals
	max_correlation = 0.0
	best_predictor = 0
	for j in 1:p
	current_correlation = abs(cor(r, x[:, j]))
	if current_correlation > max_correlation
	best_predictor = j
	max_correlation = current_correlation
	end
	end

	# Determine the coefficient update
	delta = epsilon * sign(dot(x[:, best_predictor], r))
	beta[best_predictor] = beta[best_predictor] + delta

	# Update residuals
	for i in 1:n
	r[i] = r[i] - delta * x[i, best_predictor]
	end

	# Assess convergence
	old_norm = new_norm
	new_norm = norm(r)

	# This can cycle
	if abs(old_norm - new_norm) < tolerance \|\| old_norm < new_norm
	converged = true
	end

	# Show trace information
	if mod(itr, 100) == 0
	@printf "Iteration: %d\n" itr
	@printf "Previous Residual Norm: %0.4f\n" old_norm
	@printf "Current Residual Norm: %0.4f\n" new_norm
	end
	end

	# Return estimate coefficients
	return beta
	end

	srand(2)
	x, y, beta = simulate_date(1_000, 100, 0.75)
	beta_hat = forward_stagewise(x, y, 10_000, 1e-4)
	beta_ols = inv(x'x) * x' * y

	@printf "beta: %s,%s\n" beta[1:4] beta[(end - 3):end]
	@printf "beta ~LASSO: %s,%s\n" beta_hat[1:4] beta_hat[(end - 3):end]
	@printf "beta OLS: %s\n %s\n" beta_ols[1:4] beta_ols[(end - 3):end]