Skip to content

Instantly share code, notes, and snippets.

@evgenyneu
Created May 21, 2020 05:09
Show Gist options
  • Save evgenyneu/f4961bae1a48279b80be603b8b0649c0 to your computer and use it in GitHub Desktop.
Save evgenyneu/f4961bae1a48279b80be603b8b0649c0 to your computer and use it in GitHub Desktop.
Calculate probability distribution of black marbles in a bag, given observed proportion in the sample form the bag
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
def find_posterior(M, N, p, p_hpdi):
"""
We have a bag containing black and white marbles, `M` in total.
We don't know how many black marbles are in the bag.
We take `N` from the bag and count the proportion of black marbles `p`.
Note after taking a marble, we keep it, we do not return it back
into the bag.
This function estimates probability distribution of the proportion
of black marbles in the bag.
Parameters
--------
M: int
Total number of marbles.
N: int
Number of marbles drawn from the bag
(without replacement, i.e. we keep each marble and not return it
back into the bag).
p: float
Proportion of black marbles drawn from the bag
(i.e. observed success rate).
p_hpdi: float
The probability of the HPDI interval we want to calculate
from the distribution.
"""
# Number of black marbles that we drawn (i.e. number of successes)
k = round(p * N)
# Grid containing different possibilities of the
# number of black marbles in the bag: i.e. n = 0, 1, 2, ..., M
n_grid = np.arange(0, M + 1)
# Calculate likelihoods of observing the data
# for different number of black marbles in the bag: n = 0, 1, 2, ... M
likelihood = [
stats.hypergeom.pmf(M=M, n=n, N=N, k=k)
for n in n_grid
]
# Normalise the likelihood to get probability distribution that sums to 1
posterior = likelihood / np.sum(likelihood)
# Grid containing the proportion of successes (black marbles)
p_grid = n_grid / M
# Mode
mode = p_grid[np.argmax(posterior)]
mode_str = f'Mode = {mode:.3f}'
print(mode_str)
# Calculate HPDI
hpdi = find_hpdi(probability_density=posterior, p=p_hpdi)
p_inteval = (p_grid[hpdi[0]], p_grid[hpdi[1]])
hpdi_label = f'{p_hpdi * 100:.1f}% HPDI'
interval_str = f'{hpdi_label} = ({p_inteval[0]:.3f}, {p_inteval[1]:.3f})'
print(interval_str)
# Plot the posterior distribution, its mode and HPDI
plt.plot(p_grid, posterior)
x_fill = p_grid[hpdi[0]: hpdi[1]]
y_fill = posterior[hpdi[0]: hpdi[1]]
plt.fill_between(x_fill, y_fill, color="#5533BB70", label=hpdi_label)
plt.axvline(x=mode, color='red', label="Mode")
plt.xlabel("Proportion of black marbles")
plt.ylabel("Probability density")
plt.legend()
plt.grid()
plt.title((
f"Posterior probability distribution of black marbles\n"
f"{mode_str}, {interval_str}"
))
plt.tight_layout()
plt.show()
def find_hpdi(probability_density, p=0.6827):
"""
Calculate HPDI (highest posterior density interval) that contains
`p` probability given probability density. HPDI is the narrowest
interval that contain the given `p` probability.
Parameters
----------
probability_density: list of float
Probability density values, the sum needs to be 1.
p: float
Probability for the interval. Default value `0.6827` corresponds
to traditional 1-sigma probability interval.
Returns
-------
(low, highi):
Returns the lower and high indices of HPDI interval from
`probability_density` list.
"""
# Most narrow interval containing the probability p
narrow_interval = (0, len(probability_density) - 1)
previous_effective_width = len(probability_density)
for i_left in range(len(probability_density) - 1):
prob_sum = probability_density[i_left]
for i_right in range(i_left + 1, len(probability_density)):
prob_sum += probability_density[i_right]
if (prob_sum > p):
current_width = i_right - i_left
effective_width = p * current_width / prob_sum
if effective_width < previous_effective_width:
previous_effective_width = effective_width
narrow_interval = (i_left, i_right)
break
return narrow_interval
if __name__ == "__main__":
find_posterior(M=250, N=75, p=0.93, p_hpdi=0.6827)
print("We are done")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment