florianhartig/BinomialAnalysisWith-MLE-NHST-Bayes.r

## BinomialAnalysisWith-MLE-NHST-Bayes.r
rm(list=ls(all=TRUE))

#Analysis of likelihood, p-value, and Bayes for binomial model,
#10 trials, 3 success, unknown coin, want to do inference

trials = 10
success = 3

# GETTING THE MLE ESTIMATE

# parameters to check
parametervalues <- seq(0,1,0.001)
# get the probability for all parametervalues
likelihood <- dbinom(success,trials,parametervalues)

# plot results
plot(parametervalues, likelihood, type = "l")
MLEEstimate <- parametervalues[which.max(likelihood)]
abline(v=MLEEstimate, col = "red")

# GETTING THE P-VALUE FOR FAIR COIN
# want to get p-value for a smaller or equal result (1-tailed) given a fair coin p(k<=kobs|H0:p=0.5)

smallerOrEqual <- success:0 # will result in [1] 3 2 1 0 for k=3
pValue <- sum(dbinom(smallerOrEqual,trials,prob = 0.5))
pValue
pValue < 0.05

# note that there is not one p-Value, but many, because many choices for the null hypothesis
# for example, what would you do if you null hypothesis is that a coint should have an 0.8 proability of head?

# GETTING A BAYESIAN ESTIMATE

# remember for Bayes p(M|D) = p(D|M) * p(M) / P(D), and we can show that p(D) is just the integral over p(D|M) * p(M)
# we had already calculated p(D|M), so we just need to define p(M), the prior

# for flat prior p(M) = flat = 1
# see http://en.wikipedia.org/wiki/Jeffreys_prior , section on Bernoulli trial, to understand that this is not
# neccesarily the best uninforative choice, but it is simple at any rate

prior <- rep(1,1001)
plot(parametervalues, prior)
posterior <- likelihood * prior / sum(likelihood * prior) * (length(parametervalues)-1)

plot(parametervalues, posterior, col = "darkgreen", type = "l")
lines(parametervalues, likelihood)
lines(parametervalues, prior, col = "red" )
legend("topright", c("likelihood", "prior", "posterior"), col = c("black", "red", "green"), lwd = 1 )

# you see that likelihood and posterior have the same shape.
# However, this is only because I chose a flat prior
# There is still a difference, however, namely that the posterior is normalized, i.e. will integrate to one
# It has to be, because we want to interpret it as a pdf, while the likelihood is not a pdf
# Let's look at the same example for an informative prior

prior <- dnorm(parametervalues, mean = 0.5, sd = 0.1)
plot(parametervalues, prior)
posterior <- likelihood * prior / sum(likelihood * prior) * (length(parametervalues)-1)

plot(parametervalues, posterior, col = "darkgreen", type = "l")
lines(parametervalues, likelihood)
lines(parametervalues, prior, col = "red" )
legend("topright", c("likelihood", "prior", "posterior"), col = c("black", "red", "green"), lwd = 1 )

# you can see that the likelihood moves the posterior away from the prior, but not by much
# try the same think with more data, but the same ratio, i.e. change to 30 trials, 9 success


# Confidence measures

# MLE
# MLE confidence intervals are constructed by comparing likelihood values
# of single parameters against each other. We have the MLE estimate as null
# Hypothesis and ask ourselves how likely it is that the true parameter is a
# different one. By cutting at 0.05 probability, we construct the frequentist
# CI interval. The test statistics that can be used to do this are discussed,
# e.g., in https://onlinecourses.science.psu.edu/stat504/node/39. The result
# for a 1-parameter model is that the CI is at a log likelihood difference of 1.96


confidence.level <- log(max(likelihood)) -1.96
leftCI <- parametervalues[which.min(abs(log(likelihood[1:which.max(likelihood)]) - confidence.level))]
abline(v=leftCI)
rightCI <- parametervalues[which.min(abs(log(likelihood[which.max(likelihood):length(likelihood)]) - confidence.level)) + which.max(likelihood) -1]
abline(v=rightCI)


# Bayesian MLE calculated via central 95% probability

cumPost <- cumsum(posterior) / (length(posterior) -1)

leftCI <- parametervalues[which.min(abs(cumPost - 0.025))]
abline(v=leftCI, col = "darkgreen", lty = 2, lwd = 2)
leftCI <- parametervalues[which.min(abs(cumPost - 0.975))]
abline(v=leftCI, col = "darkgreen", lty = 2, lwd = 2)
	rm(list=ls(all=TRUE))

	#Analysis of likelihood, p-value, and Bayes for binomial model,
	#10 trials, 3 success, unknown coin, want to do inference

	trials = 10
	success = 3

	# GETTING THE MLE ESTIMATE

	# parameters to check
	parametervalues <- seq(0,1,0.001)
	# get the probability for all parametervalues
	likelihood <- dbinom(success,trials,parametervalues)

	# plot results
	plot(parametervalues, likelihood, type = "l")
	MLEEstimate <- parametervalues[which.max(likelihood)]
	abline(v=MLEEstimate, col = "red")

	# GETTING THE P-VALUE FOR FAIR COIN
	# want to get p-value for a smaller or equal result (1-tailed) given a fair coin p(k<=kobs\|H0:p=0.5)

	smallerOrEqual <- success:0 # will result in [1] 3 2 1 0 for k=3
	pValue <- sum(dbinom(smallerOrEqual,trials,prob = 0.5))
	pValue
	pValue < 0.05

	# note that there is not one p-Value, but many, because many choices for the null hypothesis
	# for example, what would you do if you null hypothesis is that a coint should have an 0.8 proability of head?

	# GETTING A BAYESIAN ESTIMATE

	# remember for Bayes p(M\|D) = p(D\|M) * p(M) / P(D), and we can show that p(D) is just the integral over p(D\|M) * p(M)
	# we had already calculated p(D\|M), so we just need to define p(M), the prior

	# for flat prior p(M) = flat = 1
	# see http://en.wikipedia.org/wiki/Jeffreys_prior , section on Bernoulli trial, to understand that this is not
	# neccesarily the best uninforative choice, but it is simple at any rate

	prior <- rep(1,1001)
	plot(parametervalues, prior)
	posterior <- likelihood * prior / sum(likelihood * prior) * (length(parametervalues)-1)

	plot(parametervalues, posterior, col = "darkgreen", type = "l")
	lines(parametervalues, likelihood)
	lines(parametervalues, prior, col = "red" )
	legend("topright", c("likelihood", "prior", "posterior"), col = c("black", "red", "green"), lwd = 1 )

	# you see that likelihood and posterior have the same shape.
	# However, this is only because I chose a flat prior
	# There is still a difference, however, namely that the posterior is normalized, i.e. will integrate to one
	# It has to be, because we want to interpret it as a pdf, while the likelihood is not a pdf
	# Let's look at the same example for an informative prior

	prior <- dnorm(parametervalues, mean = 0.5, sd = 0.1)
	plot(parametervalues, prior)
	posterior <- likelihood * prior / sum(likelihood * prior) * (length(parametervalues)-1)

	plot(parametervalues, posterior, col = "darkgreen", type = "l")
	lines(parametervalues, likelihood)
	lines(parametervalues, prior, col = "red" )
	legend("topright", c("likelihood", "prior", "posterior"), col = c("black", "red", "green"), lwd = 1 )

	# you can see that the likelihood moves the posterior away from the prior, but not by much
	# try the same think with more data, but the same ratio, i.e. change to 30 trials, 9 success


	# Confidence measures

	# MLE
	# MLE confidence intervals are constructed by comparing likelihood values
	# of single parameters against each other. We have the MLE estimate as null
	# Hypothesis and ask ourselves how likely it is that the true parameter is a
	# different one. By cutting at 0.05 probability, we construct the frequentist
	# CI interval. The test statistics that can be used to do this are discussed,
	# e.g., in https://onlinecourses.science.psu.edu/stat504/node/39. The result
	# for a 1-parameter model is that the CI is at a log likelihood difference of 1.96



	confidence.level <- log(max(likelihood)) -1.96
	leftCI <- parametervalues[which.min(abs(log(likelihood[1:which.max(likelihood)]) - confidence.level))]
	abline(v=leftCI)
	rightCI <- parametervalues[which.min(abs(log(likelihood[which.max(likelihood):length(likelihood)]) - confidence.level)) + which.max(likelihood) -1]
	abline(v=rightCI)



	# Bayesian MLE calculated via central 95% probability

	cumPost <- cumsum(posterior) / (length(posterior) -1)

	leftCI <- parametervalues[which.min(abs(cumPost - 0.025))]
	abline(v=leftCI, col = "darkgreen", lty = 2, lwd = 2)
	leftCI <- parametervalues[which.min(abs(cumPost - 0.975))]
	abline(v=leftCI, col = "darkgreen", lty = 2, lwd = 2)