alstat/code-turing.jl

## code-turing.jl
using Turing, Distributions

# Import RDatasets.
using RDatasets

# Import MCMCChain, Plots, and StatPlots for visualizations and diagnostics.
using MCMCChain, Plots, StatPlots

# MLDataUtils provides a sample splitting tool that's very handy.
using MLDataUtils

# Set a seed for reproducibility.
using Random
Random.seed!(0);

# Hide the progress prompt while sampling.
Turing.turnprogress(false);

# Import the "Default" dataset.
data = RDatasets.dataset("datasets", "mtcars");

# Show the first six rows of the dataset.
head(data, 6)

# Split our dataset 70%/30% into training/test sets.
train, test = MLDataUtils.splitobs(data, at = 0.7);

# Save dataframe versions of our dataset.
train_cut = DataFrame(train)
test_cut = DataFrame(test)

# Create our labels. These are the values we are trying to predict.
train_label = train[:, :MPG]
test_label = test[:, :MPG]

# Get the list of columns to keep.
remove_names = filter(x->!in(x, [:MPG, :Model]), names(data))

# Filter the test and train sets.
train = Matrix(train[:,remove_names]);
test = Matrix(test[:,remove_names]);

# A handy helper function to rescale our dataset.
function standardize(x)
    return (x .- mean(x, dims=1)) ./ std(x, dims=1), x
end

# Another helper function to unstandardize our datasets.
function unstandardize(x, orig)
    return x .* std(orig, dims=1) .+ mean(orig, dims=1)
end

# Standardize our dataset.
(train, train_orig) = standardize(train)
(test, test_orig) = standardize(test)
(train_label, train_l_orig) = standardize(train_label)
(test_label, test_l_orig) = standardize(test_label);

# Bayesian linear regression.
@model linear_regression(x, y, n_obs, n_vars) = begin
    # Set variance prior.
    σ₂ ~ TruncatedNormal(0,100, 0, Inf)

    # Set intercept prior.
    intercept ~ Normal(0, 3)

    # Set the priors on our coefficients.
    coefficients = Array{Real}(undef, n_vars)
    coefficients ~ [Normal(0, 10)]

    # Calculate all the mu terms.
    mu = intercept .+ x * coefficients
    for i = 1:n_obs
        y[i] ~ Normal(mu[i], σ₂)
    end
end;

n_obs, n_vars = size(train)
model = linear_regression(train, train_label, n_obs, n_vars)
chain = sample(model, NUTS(1500, 200, 0.65));
	using Turing, Distributions

	# Import RDatasets.
	using RDatasets

	# Import MCMCChain, Plots, and StatPlots for visualizations and diagnostics.
	using MCMCChain, Plots, StatPlots

	# MLDataUtils provides a sample splitting tool that's very handy.
	using MLDataUtils

	# Set a seed for reproducibility.
	using Random
	Random.seed!(0);

	# Hide the progress prompt while sampling.
	Turing.turnprogress(false);

	# Import the "Default" dataset.
	data = RDatasets.dataset("datasets", "mtcars");

	# Show the first six rows of the dataset.
	head(data, 6)

	# Split our dataset 70%/30% into training/test sets.
	train, test = MLDataUtils.splitobs(data, at = 0.7);

	# Save dataframe versions of our dataset.
	train_cut = DataFrame(train)
	test_cut = DataFrame(test)

	# Create our labels. These are the values we are trying to predict.
	train_label = train[:, :MPG]
	test_label = test[:, :MPG]

	# Get the list of columns to keep.
	remove_names = filter(x->!in(x, [:MPG, :Model]), names(data))

	# Filter the test and train sets.
	train = Matrix(train[:,remove_names]);
	test = Matrix(test[:,remove_names]);

	# A handy helper function to rescale our dataset.
	function standardize(x)
	return (x .- mean(x, dims=1)) ./ std(x, dims=1), x
	end

	# Another helper function to unstandardize our datasets.
	function unstandardize(x, orig)
	return x .* std(orig, dims=1) .+ mean(orig, dims=1)
	end

	# Standardize our dataset.
	(train, train_orig) = standardize(train)
	(test, test_orig) = standardize(test)
	(train_label, train_l_orig) = standardize(train_label)
	(test_label, test_l_orig) = standardize(test_label);

	# Bayesian linear regression.
	@model linear_regression(x, y, n_obs, n_vars) = begin
	# Set variance prior.
	σ₂ ~ TruncatedNormal(0,100, 0, Inf)

	# Set intercept prior.
	intercept ~ Normal(0, 3)

	# Set the priors on our coefficients.
	coefficients = Array{Real}(undef, n_vars)
	coefficients ~ [Normal(0, 10)]

	# Calculate all the mu terms.
	mu = intercept .+ x * coefficients
	for i = 1:n_obs
	y[i] ~ Normal(mu[i], σ₂)
	end
	end;

	n_obs, n_vars = size(train)
	model = linear_regression(train, train_label, n_obs, n_vars)
	chain = sample(model, NUTS(1500, 200, 0.65));