krzentner/normal_gamma_estimator.py

## normal_gamma_estimator.py
from dataclasses import dataclass
import numpy as np


def bayesian_update(x: np.ndarray,
                    k: int, mu: float, alpha: float, beta: float):
    """
    Graphical model is like this:
    tau ~ IGa(alpha, beta)
    x_i ~ N(mu, tau)

    Where IGa is the Inverse Gamma Distribution and N is the Normal
    distribution.

    Refer to
    "Conjugate Bayesian analysis of the Gaussian distribution" by
    Kevin P. Murphy
    Implemented by K.R. Zentner

    Args:
        x (np.ndarray): The samples to update the estimate with.
        k (int): The number of samples encountered so far.
        mu (float): The current estimate of the mean.
        alpha (float): The "shape" parameter of the Inverse Gamma
            distribution estimating the variance.
        beta (float): The "scale" parameter of the Inverse Gamma
            distribution estimating the variance.
    """
    x_bar = np.mean(x)
    n = len(x)

    mu_ = (n * x_bar + k * mu) / (n + k)
    alpha_ = alpha + (n / 2.)
    # The second term is zero when n == 1.
    beta_ = (beta +
             0.5 * np.sum((x - x_bar) ** 2) +
             (k * n * (x_bar - mu) ** 2) /
              (2 * (k + n)))
    return n + k, mu_, alpha_, beta_


@dataclass
class NGEstimator:

    # The number of samples encountered so far
    k: int = 0

    # The estimated mean
    mu: float = 0.

    # The parameters for computing the estimated variance
    # Both increase continuously during sampling.
    # Refer to the Wikipedia page on the Inverse Gamme distribution for help
    # in setting starting values.
    alpha: float = 1.
    beta: float = 2.

    def update(self, x):
        n, mu, alpha, beta = bayesian_update(
            x, self.k, self.mu, self.alpha, self.beta)
        self.k = n
        self.mu = mu
        self.alpha = alpha
        self.beta = beta

    def var(self, n=1):
        # We use numpy's Gamma distribution instead of scipy.invgamma
        # Note this requires taking the reciprocal (and taking the
        # reciprocal of beta)
        return 1 / (np.random.gamma(self.alpha,
                                    scale=1/self.beta,
                                    size=n))

    def mean(self, n=1):
        return np.repeat(self.mu, n)

    def sample(self, n=1):
        variances = self.var(n)
        return np.random.normal(self.mu, np.sqrt(variances))


# Just a little test:
mu_star = 10 * np.random.uniform()
var_star = 10 * np.random.uniform()

# Use the same set of samples each time, so we should get the "exact" same
# result each time (except for floating point imprecision).
samples = np.random.normal(mu_star,
                           np.sqrt(var_star),
                           size=10000)

for j in [1, 10, 100]:
    print('For samples of size', j)
    estimator = NGEstimator()
    for x in np.split(samples, int(len(samples) / j)):
        estimator.update(x)
    print('\t', estimator, sep='')
    print('\tetimated dist: \tN({}, {})'.format(estimator.mean(),
                                            estimator.var()))
    print('\ttrue dist: \tN({}, {})'.format(mu_star, var_star))
    print('\tvariance of 100 true samples:',
        np.var(np.random.normal(mu_star, np.sqrt(var_star), size=100)))
    print('\tvariance of 100 estimator samples:',
        np.var(estimator.sample(100)))
    print()
	from dataclasses import dataclass
	import numpy as np


	def bayesian_update(x: np.ndarray,
	k: int, mu: float, alpha: float, beta: float):
	"""
	Graphical model is like this:
	tau ~ IGa(alpha, beta)
	x_i ~ N(mu, tau)

	Where IGa is the Inverse Gamma Distribution and N is the Normal
	distribution.

	Refer to
	"Conjugate Bayesian analysis of the Gaussian distribution" by
	Kevin P. Murphy
	Implemented by K.R. Zentner

	Args:
	x (np.ndarray): The samples to update the estimate with.
	k (int): The number of samples encountered so far.
	mu (float): The current estimate of the mean.
	alpha (float): The "shape" parameter of the Inverse Gamma
	distribution estimating the variance.
	beta (float): The "scale" parameter of the Inverse Gamma
	distribution estimating the variance.
	"""
	x_bar = np.mean(x)
	n = len(x)

	mu_ = (n * x_bar + k * mu) / (n + k)
	alpha_ = alpha + (n / 2.)
	# The second term is zero when n == 1.
	beta_ = (beta +
	0.5 * np.sum((x - x_bar) ** 2) +
	(k * n * (x_bar - mu) ** 2) /
	(2 * (k + n)))
	return n + k, mu_, alpha_, beta_


	@dataclass
	class NGEstimator:

	# The number of samples encountered so far
	k: int = 0

	# The estimated mean
	mu: float = 0.

	# The parameters for computing the estimated variance
	# Both increase continuously during sampling.
	# Refer to the Wikipedia page on the Inverse Gamme distribution for help
	# in setting starting values.
	alpha: float = 1.
	beta: float = 2.

	def update(self, x):
	n, mu, alpha, beta = bayesian_update(
	x, self.k, self.mu, self.alpha, self.beta)
	self.k = n
	self.mu = mu
	self.alpha = alpha
	self.beta = beta

	def var(self, n=1):
	# We use numpy's Gamma distribution instead of scipy.invgamma
	# Note this requires taking the reciprocal (and taking the
	# reciprocal of beta)
	return 1 / (np.random.gamma(self.alpha,
	scale=1/self.beta,
	size=n))

	def mean(self, n=1):
	return np.repeat(self.mu, n)

	def sample(self, n=1):
	variances = self.var(n)
	return np.random.normal(self.mu, np.sqrt(variances))


	# Just a little test:
	mu_star = 10 * np.random.uniform()
	var_star = 10 * np.random.uniform()

	# Use the same set of samples each time, so we should get the "exact" same
	# result each time (except for floating point imprecision).
	samples = np.random.normal(mu_star,
	np.sqrt(var_star),
	size=10000)

	for j in [1, 10, 100]:
	print('For samples of size', j)
	estimator = NGEstimator()
	for x in np.split(samples, int(len(samples) / j)):
	estimator.update(x)
	print('\t', estimator, sep='')
	print('\tetimated dist: \tN({}, {})'.format(estimator.mean(),
	estimator.var()))
	print('\ttrue dist: \tN({}, {})'.format(mu_star, var_star))
	print('\tvariance of 100 true samples:',
	np.var(np.random.normal(mu_star, np.sqrt(var_star), size=100)))
	print('\tvariance of 100 estimator samples:',
	np.var(estimator.sample(100)))
	print()