astrolitterbox/ex_2.R

## ex_2.R
# -------------------- 2E1.
# . Which of the expressions below correspond to the statement: the probability of rain on Monday?
#Pr(rain)
#Pr(rain|Monday)
#Pr(Monday|rain)
#Pr(rain, Monday)/ Pr(Monday)
#2) Pr(rain|Monday)

# ------------------------------ 2E2.
#Which of the following statements corresponds to the expression: Pr(Monday|rain)?
#The probability of rain on Monday.
#The probability of rain, given that it is Monday.
#The probability that it is Monday, given that it is raining.
#The probability that it is Monday and that it is raining.

# A: (3) The probability that it is Monday, given that it is raining.

# -----------------------------2E3.
#Which of the expressions below correspond to the statement: the probability that it is Monday,
#given that it is raining?

#Pr(Monday|rain)
#Pr(rain|Monday)
#Pr(rain|Monday) Pr(Monday)
#Pr(rain|Monday) Pr(Monday)/ Pr(rain)
#Pr(Monday|rain) Pr(rain)/ Pr(Monday)

#A:
#1) Pr(Monday|rain)
#4) 4 -- gal irgi? pgl. Bayeso teoremą?

# -------------------------------2E4. PROBABILITY DOES NOT EXIST:
#What he meant is that probability is a device for describing uncertainty from the perspective of an observer with limited knowledge; it has no
#objective reality.

#“the probability of water is 0.7" -- mes nežinome, kokią gaublio vietą paliesime, bet tai bus arba vanduo, arba žemė.
#Visa informacija, kurią turime, yra apytikslis žemės/vandens santykis, o tiksliau specifikuoti atsakymo negalime.

#----------------------------------------------------------   2M1
# define grid
p_grid <- seq( from=0 , to=1 , length.out=20 )
# define prior
prior <- rep( 1 , 20 )
get_posterior <- function(l, p) {
  	# compute product of likelihood and prior
	unstd.posterior <- l * p
	# standardize the posterior, so it sums to 1
	posterior <- unstd.posterior / sum(unstd.posterior)
  return(posterior)
}

png("2M1.png")
# compute likelihood at each value in grid
#W ∼ Binomial(N, p) -- N -- total number of draws, p -- probability
likelihood1 <- dbinom(3 , size=3 , prob=p_grid )
posterior1 <- get_posterior(likelihood1, prior)

likelihood2 <- dbinom(3 , size=4 , prob=p_grid )
posterior2 <- get_posterior(likelihood2, prior)

likelihood3 <- dbinom(5 , size=7 , prob=p_grid )
posterior3 <- get_posterior(likelihood3, prior)
plot( p_grid , posterior1 , type="b" , xlab="probability of water" , ylab="posterior probability" )
lines( p_grid , posterior2 , type="b" ,  col="red", xlab="probability of water" , ylab="posterior probability")
lines( p_grid , posterior3 , type="b" ,  col="green", xlab="probability of water" , ylab="posterior probability")
legend(1, 95, legend=c("1", "2", "3"),
       col=c("black", "red", "green"), lty=1:2, cex=0.8)

#----------------------------------------------------------   2M2
#2M2
# define grid
p_grid <- seq( from=0 , to=1 , length.out=20 )
# define prior
prior_s <- seq( 0 , 1,length.out= 20 )
prior = ifelse(prior_s < 0.5,0,1)
get_posterior <- function(l, p) {
  	# compute product of likelihood and prior
	unstd.posterior <- l * p
	# standardize the posterior, so it sums to 1
	posterior <- unstd.posterior / sum(unstd.posterior)
  return(posterior)
}

png("2M2.png")
# compute likelihood at each value in grid
#W ∼ Binomial(N, p) -- N -- total number of draws, p -- probability
likelihood1 <- dbinom(3 , size=3 , prob=p_grid )
posterior1 <- get_posterior(likelihood1, prior)

likelihood2 <- dbinom(3 , size=4 , prob=p_grid )
posterior2 <- get_posterior(likelihood2, prior)

likelihood3 <- dbinom(5 , size=7 , prob=p_grid )
posterior3 <- get_posterior(likelihood3, prior)


plot( p_grid , posterior1 , type="b" , xlab="probability of water" , ylab="posterior probability" )


lines( p_grid , posterior2 , type="b" ,  col="red", xlab="probability of water" , ylab="posterior probability")


lines( p_grid , posterior3 , type="b" ,  col="green", xlab="probability of water" , ylab="posterior probability")

legend(1, 95, legend=c("1", "2", "3"),
       col=c("black", "red", "green"), lty=1:2, cex=0.8)

# ----------------------------------------- 2M3
#Earth / Mars globes
#P(E|L) = P(L|E) P(E)/ P(L) = P(L|E)P(E) \ ((P(L |E)P(E) + P(L|M)P(M) = 0.3*0.5/(0.3*0.5+1*0.5) = 0.23

# ------------------------------------------------------- H1
lik_A <- 0.1 #probability of twins (data) given the model (A species)
lik_B <- 0.2

likelihood <- c(lik_A, lik_B)
#flat priors:
pr <- c(1, 1)
posterior <- likelihood*pr
posterior <- posterior/sum(posterior)

#0.33, 0.66

#probability of the next twin is equal to: the probability that the panda belongs to a species _given_ the first twins * likelihood of twins for each species, summed:
prob <- lik_A*posterior[1] + lik_B*posterior[2]
prob


# -------------------------- 2M4
#Suppose you have a deck with only three cards. Each card has only two sides, and each side is either black or white.
#One card has two black sides. The second card has one black and one white side. The third card has two white sides.
#Now suppose all three cards are placed in a bag and shuffled. Someone reaches into the bag and pulls out a card and places it flat on a table.
#A black side is shown facing up, but you don't know the color of the side facing down. Show that the probability that the other side is also black is 2/3.
#Use the counting method (Section 2 of the chapter) to approach this problem. This means counting up the ways that each card could produce the observed data (a black side faceing up on the table).

lik_bb <- 2
lik_bw <- 1
lik_ww <- 0

likelihood <- c(lik_bb, lik_bw, lik_ww)
prior <- c(1, 1, 1)
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
posterior[1]
#0.667


# -------------------------- 2M5
#Now suppose there are four cards: B/B, B/W, W/W, and another B/B. Again suppose a card is
#drawn from the bag and a black side appears face up. Again calculate the probability that the other
#side is black.
lik_bb <- 2
lik_bw <- 1
lik_ww <- 0
likelihood <- c(lik_bb, lik_bw, lik_ww, lik_bb)
prior <- c(1, 1, 1,1)
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
#0.8


#----------------------------- 2M6
#Imagine that black ink is heavy, and so cards with black sides are heavier than cards with white
#sides. As a result, it’s less likely that a card with black sides is pulled from the bag. So again assume
#there are three cards: B/B, B/W, and W/W. After experimenting a number of times, you conclude that
#for every way to pull the B/B card from the bag, there are 2 ways to pull the B/W card and 3 ways to
#pull the W/W card. Again suppose that a card is pulled and a black side appears face up. Show that
#the probability the other side is black is now 0.5. Use the counting method, as before.

lik_bb <- 2
lik_bw <- 1
lik_ww <- 0

likelihood <- c(lik_bb, lik_bw, lik_ww)
prior <- c(1, 2, 3)
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
posterior[1]
#0.5


#----------------------------- 2M7
#Assume again the original card problem, with a single card showing a black side face up. Before
#looking at the other side, we draw another card from the bag and lay it face up on the table. The face
#that is shown on the new card is white. Show that the probability that the first card, the one showing
#a black side, has black on its other side is now 0.75. Use the counting method, if you can. Hint: Treat
#this like the sequence of globe tosses, counting all the ways to see each observation, for each possible
#first card.

lik_bb <- 2 * 3  #3 ways to draw white second card
lik_wb <- 1 * 2
lik_ww <- 0

likelihood <- c(lik_bb, lik_bw, lik_ww)
prior <- c(1,1,1)
posterior <- prior * likelihood
posterior <- posterior / sum(posterior)
posterior
#0.85 --- ???

# ------------------------------------------------------- H2

#2H2. Recall all the facts from the problem above. Now compute the probability that the panda we
#have is from species A, assuming we have observed only the first birth and that it was twins.

#Q: find P(A|T):
#We have calcucated it, actually -- it's posterior[1] above.

#P(A|T) = P(T|A)*P(A)/P(T) = P(T|A)P(A)/(P(T|A)P(A) + P(T|B)P(B)) = 0.1*0.5/(0.1*0.5 + 0.2*0.5) = 0.333

# ---------------------------------------------------------- H3

#2H3. Continuing on from the previous problem, suppose the same panda mother has a second birth
#and that it is not twins, but a singleton infant. Compute the posterior probability that this panda is
#species A.

lik_TSA <- 0.1*(1-0.1)  #likelihood of giving birth to twins and a single cub for A
lik_TSB <- 0.2*(1-0.2)

#same flat uninformative priors
likelihood = c(lik_TSA, lik_TSB)
posterior = likelihood*pr
posterior <- posterior/sum(posterior)
posterior
#0.36

# ------------------------------------------------------------ H4

#    2H4. A common boast of Bayesian statisticians is that Bayesian inference makes it easy to use all of the data, even if the data are of different types.
#    So suppose now that a veterinarian comes along who has a new genetic test that she claims can identify the species of our mother panda. But the test, like all tests, is imperfect. This is the information you have about the test:

#        The probability it correctly identifies a species A panda is 0.8.
#        The probability it correctly identifies a species B panda is 0.65.

#    The vet administers the test to your panda and tells you that the test is positive for species A. First ignore your previous information from the births and compute the posterior probability that your panda is species A.
#Then redo your calculation, now using the birth data as well.

#P(A) = P(B) = 0.5

#P(A|P) = P(A)*P(P)/(P(A)*P(P) + P(B)*P(N))

p_ap <- (0.5*0.8)/((0.5*0.8) + (0.5*0.35))
p_ap
#0.69


#Using birth data as well:
# P(A|TS,P) = P(TS,P|A)*P(P)/(P(TS, P(P) + P(TS, (1-P(P))) = (0.1*(1-0.1)*0.69)/ (((0.1*(1-0.1)*0.69) + (0.2*(1-0.2)*0.31)


#likelihood of T, S sequence for both species:

lik_TSA <- 0.1*(1-0.1)
lik_TSB <- 0.2*(1-0.2)

likelihood <- c(lik_TSA, lik_TSB)
prior <- c(p_ap, (1-p_ap)) # joint probability: posterior probability of B given AP
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
posterior[1]
	# -------------------- 2E1.
	# . Which of the expressions below correspond to the statement: the probability of rain on Monday?
	#Pr(rain)
	#Pr(rain\|Monday)
	#Pr(Monday\|rain)
	#Pr(rain, Monday)/ Pr(Monday)
	#2) Pr(rain\|Monday)

	# ------------------------------ 2E2.
	#Which of the following statements corresponds to the expression: Pr(Monday\|rain)?
	#The probability of rain on Monday.
	#The probability of rain, given that it is Monday.
	#The probability that it is Monday, given that it is raining.
	#The probability that it is Monday and that it is raining.

	# A: (3) The probability that it is Monday, given that it is raining.

	# -----------------------------2E3.
	#Which of the expressions below correspond to the statement: the probability that it is Monday,
	#given that it is raining?

	#Pr(Monday\|rain)
	#Pr(rain\|Monday)
	#Pr(rain\|Monday) Pr(Monday)
	#Pr(rain\|Monday) Pr(Monday)/ Pr(rain)
	#Pr(Monday\|rain) Pr(rain)/ Pr(Monday)

	#A:
	#1) Pr(Monday\|rain)
	#4) 4 -- gal irgi? pgl. Bayeso teoremą?

	# -------------------------------2E4. PROBABILITY DOES NOT EXIST:
	#What he meant is that probability is a device for describing uncertainty from the perspective of an observer with limited knowledge; it has no
	#objective reality.

	#“the probability of water is 0.7" -- mes nežinome, kokią gaublio vietą paliesime, bet tai bus arba vanduo, arba žemė.
	#Visa informacija, kurią turime, yra apytikslis žemės/vandens santykis, o tiksliau specifikuoti atsakymo negalime.

	#---------------------------------------------------------- 2M1
	# define grid
	p_grid <- seq( from=0 , to=1 , length.out=20 )
	# define prior
	prior <- rep( 1 , 20 )
	get_posterior <- function(l, p) {
	# compute product of likelihood and prior
	unstd.posterior <- l * p
	# standardize the posterior, so it sums to 1
	posterior <- unstd.posterior / sum(unstd.posterior)
	return(posterior)
	}

	png("2M1.png")
	# compute likelihood at each value in grid
	#W ∼ Binomial(N, p) -- N -- total number of draws, p -- probability
	likelihood1 <- dbinom(3 , size=3 , prob=p_grid )
	posterior1 <- get_posterior(likelihood1, prior)

	likelihood2 <- dbinom(3 , size=4 , prob=p_grid )
	posterior2 <- get_posterior(likelihood2, prior)

	likelihood3 <- dbinom(5 , size=7 , prob=p_grid )
	posterior3 <- get_posterior(likelihood3, prior)
	plot( p_grid , posterior1 , type="b" , xlab="probability of water" , ylab="posterior probability" )
	lines( p_grid , posterior2 , type="b" , col="red", xlab="probability of water" , ylab="posterior probability")
	lines( p_grid , posterior3 , type="b" , col="green", xlab="probability of water" , ylab="posterior probability")
	legend(1, 95, legend=c("1", "2", "3"),
	col=c("black", "red", "green"), lty=1:2, cex=0.8)

	#---------------------------------------------------------- 2M2
	#2M2
	# define grid
	p_grid <- seq( from=0 , to=1 , length.out=20 )
	# define prior
	prior_s <- seq( 0 , 1,length.out= 20 )
	prior = ifelse(prior_s < 0.5,0,1)
	get_posterior <- function(l, p) {
	# compute product of likelihood and prior
	unstd.posterior <- l * p
	# standardize the posterior, so it sums to 1
	posterior <- unstd.posterior / sum(unstd.posterior)
	return(posterior)
	}

	png("2M2.png")
	# compute likelihood at each value in grid
	#W ∼ Binomial(N, p) -- N -- total number of draws, p -- probability
	likelihood1 <- dbinom(3 , size=3 , prob=p_grid )
	posterior1 <- get_posterior(likelihood1, prior)

	likelihood2 <- dbinom(3 , size=4 , prob=p_grid )
	posterior2 <- get_posterior(likelihood2, prior)

	likelihood3 <- dbinom(5 , size=7 , prob=p_grid )
	posterior3 <- get_posterior(likelihood3, prior)


	plot( p_grid , posterior1 , type="b" , xlab="probability of water" , ylab="posterior probability" )


	lines( p_grid , posterior2 , type="b" , col="red", xlab="probability of water" , ylab="posterior probability")


	lines( p_grid , posterior3 , type="b" , col="green", xlab="probability of water" , ylab="posterior probability")

	legend(1, 95, legend=c("1", "2", "3"),
	col=c("black", "red", "green"), lty=1:2, cex=0.8)

	# ----------------------------------------- 2M3
	#Earth / Mars globes
	#P(E\|L) = P(L\|E) P(E)/ P(L) = P(L\|E)P(E) \ ((P(L \|E)P(E) + P(L\|M)P(M) = 0.30.5/(0.30.5+1*0.5) = 0.23

	# ------------------------------------------------------- H1
	lik_A <- 0.1 #probability of twins (data) given the model (A species)
	lik_B <- 0.2

	likelihood <- c(lik_A, lik_B)
	#flat priors:
	pr <- c(1, 1)
	posterior <- likelihood*pr
	posterior <- posterior/sum(posterior)

	#0.33, 0.66

	#probability of the next twin is equal to: the probability that the panda belongs to a species _given_ the first twins * likelihood of twins for each species, summed:
	prob <- lik_Aposterior[1] + lik_Bposterior[2]
	prob


	# -------------------------- 2M4
	#Suppose you have a deck with only three cards. Each card has only two sides, and each side is either black or white.
	#One card has two black sides. The second card has one black and one white side. The third card has two white sides.
	#Now suppose all three cards are placed in a bag and shuffled. Someone reaches into the bag and pulls out a card and places it flat on a table.
	#A black side is shown facing up, but you don't know the color of the side facing down. Show that the probability that the other side is also black is 2/3.
	#Use the counting method (Section 2 of the chapter) to approach this problem. This means counting up the ways that each card could produce the observed data (a black side faceing up on the table).

	lik_bb <- 2
	lik_bw <- 1
	lik_ww <- 0

	likelihood <- c(lik_bb, lik_bw, lik_ww)
	prior <- c(1, 1, 1)
	posterior <- likelihood * prior
	posterior <- posterior / sum(posterior)
	posterior[1]
	#0.667


	# -------------------------- 2M5
	#Now suppose there are four cards: B/B, B/W, W/W, and another B/B. Again suppose a card is
	#drawn from the bag and a black side appears face up. Again calculate the probability that the other
	#side is black.
	lik_bb <- 2
	lik_bw <- 1
	lik_ww <- 0
	likelihood <- c(lik_bb, lik_bw, lik_ww, lik_bb)
	prior <- c(1, 1, 1,1)
	posterior <- likelihood * prior
	posterior <- posterior / sum(posterior)
	#0.8


	#----------------------------- 2M6
	#Imagine that black ink is heavy, and so cards with black sides are heavier than cards with white
	#sides. As a result, it’s less likely that a card with black sides is pulled from the bag. So again assume
	#there are three cards: B/B, B/W, and W/W. After experimenting a number of times, you conclude that
	#for every way to pull the B/B card from the bag, there are 2 ways to pull the B/W card and 3 ways to
	#pull the W/W card. Again suppose that a card is pulled and a black side appears face up. Show that
	#the probability the other side is black is now 0.5. Use the counting method, as before.

	lik_bb <- 2
	lik_bw <- 1
	lik_ww <- 0

	likelihood <- c(lik_bb, lik_bw, lik_ww)
	prior <- c(1, 2, 3)
	posterior <- likelihood * prior
	posterior <- posterior / sum(posterior)
	posterior[1]
	#0.5



	#----------------------------- 2M7
	#Assume again the original card problem, with a single card showing a black side face up. Before
	#looking at the other side, we draw another card from the bag and lay it face up on the table. The face
	#that is shown on the new card is white. Show that the probability that the first card, the one showing
	#a black side, has black on its other side is now 0.75. Use the counting method, if you can. Hint: Treat
	#this like the sequence of globe tosses, counting all the ways to see each observation, for each possible
	#first card.

	lik_bb <- 2 * 3 #3 ways to draw white second card
	lik_wb <- 1 * 2
	lik_ww <- 0

	likelihood <- c(lik_bb, lik_bw, lik_ww)
	prior <- c(1,1,1)
	posterior <- prior * likelihood
	posterior <- posterior / sum(posterior)
	posterior
	#0.85 --- ???

	# ------------------------------------------------------- H2

	#2H2. Recall all the facts from the problem above. Now compute the probability that the panda we
	#have is from species A, assuming we have observed only the first birth and that it was twins.

	#Q: find P(A\|T):
	#We have calcucated it, actually -- it's posterior[1] above.

	#P(A\|T) = P(T\|A)P(A)/P(T) = P(T\|A)P(A)/(P(T\|A)P(A) + P(T\|B)P(B)) = 0.10.5/(0.10.5 + 0.20.5) = 0.333

	# ---------------------------------------------------------- H3

	#2H3. Continuing on from the previous problem, suppose the same panda mother has a second birth
	#and that it is not twins, but a singleton infant. Compute the posterior probability that this panda is
	#species A.

	lik_TSA <- 0.1*(1-0.1) #likelihood of giving birth to twins and a single cub for A
	lik_TSB <- 0.2*(1-0.2)

	#same flat uninformative priors
	likelihood = c(lik_TSA, lik_TSB)
	posterior = likelihood*pr
	posterior <- posterior/sum(posterior)
	posterior
	#0.36

	# ------------------------------------------------------------ H4

	# 2H4. A common boast of Bayesian statisticians is that Bayesian inference makes it easy to use all of the data, even if the data are of different types.
	# So suppose now that a veterinarian comes along who has a new genetic test that she claims can identify the species of our mother panda. But the test, like all tests, is imperfect. This is the information you have about the test:

	# The probability it correctly identifies a species A panda is 0.8.
	# The probability it correctly identifies a species B panda is 0.65.

	# The vet administers the test to your panda and tells you that the test is positive for species A. First ignore your previous information from the births and compute the posterior probability that your panda is species A.
	#Then redo your calculation, now using the birth data as well.

	#P(A) = P(B) = 0.5

	#P(A\|P) = P(A)P(P)/(P(A)P(P) + P(B)*P(N))

	p_ap <- (0.50.8)/((0.50.8) + (0.5*0.35))
	p_ap
	#0.69


	#Using birth data as well:
	# P(A\|TS,P) = P(TS,P\|A)P(P)/(P(TS, P(P) + P(TS, (1-P(P))) = (0.1(1-0.1)0.69)/ (((0.1(1-0.1)0.69) + (0.2(1-0.2)*0.31)


	#likelihood of T, S sequence for both species:

	lik_TSA <- 0.1*(1-0.1)
	lik_TSB <- 0.2*(1-0.2)

	likelihood <- c(lik_TSA, lik_TSB)
	prior <- c(p_ap, (1-p_ap)) # joint probability: posterior probability of B given AP
	posterior <- likelihood * prior
	posterior <- posterior / sum(posterior)
	posterior[1]