Skip to content

Instantly share code, notes, and snippets.

@gaebor
Last active May 2, 2019 14:52
Show Gist options
  • Save gaebor/806d7dad5817e2babce3f591cc244569 to your computer and use it in GitHub Desktop.
Save gaebor/806d7dad5817e2babce3f591cc244569 to your computer and use it in GitHub Desktop.
Performs Bayesian Model Comparison to find the right mixture of Gaussians

Bayesian Model Comparison

see D. MacKay: Information Theory, Inference, and Learning Algorithms (2003), Chapter 28

Requirements

  • python3
  • numpy
  • theano for gradient descent
  • matplotlib for exporting the images
  • sklearn this is used only for softmax, the sklearn implements a numerically stable softmax.
  • thextensions from here for calculating a Hessian
    • pip install -U git+https://github.com/gaebor/thextensions.git

Model

The true model is a mixture of two Gaussians, a two-humped distribution. The program draws point from this distribution and creates a gradually increasing dataset. Also several models are fit to this data and as the data grows, the right model wins.

Usage

Fits mixture of Gaussian model to a gradually increasing dataset. You can see
how the models compete. Contact: http://math.bme.hu/~borbely/indexeng

optional arguments:
  -h, --help            show this help message and exit
  -k K                  max number of mixtures (default: 5)
  --min MIN             number of datapoints to begin with (default: 1)
  --add ADD             number of datapoints added when new data arrive
                        (default: 10)
  -i ITER, --iter ITER  number of gradient descent optimization steps per
                        model (default: 200)
  -e EPOCH, --epoch EPOCH
                        how many times a new data arrive (default: 10)
  -r, --random          learns new random models each time (default: False)
  -smin SIGMAMIN, --sigmamin SIGMAMIN
                        minimum variance of the Gaussian (default: 0.1)
  -smax SIGMAMAX, --sigmamax SIGMAMAX
                        maximum variance of the Gaussian (default: 10)
  -mmin MUMIN, --mumin MUMIN
                        minimum mean of the Gaussian (default: -10)
  -mmax MUMAX, --mumax MUMAX
                        maximum mean of the Gaussian (default: 10)
  -l ETA, --eta ETA, --lerningrate ETA
                        learning rate (default: 0.1)
  -f FILENAME, --filename FILENAME
                        filename prefix for the output images (default: bmc)
  --format FORMAT       matplotlib export format (default: png)
from __future__ import print_function
import numpy
import theano
import theano.tensor as T
from thextensions import *
from sklearn.utils.extmath import softmax
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from itertools import islice
import argparse
import sys
class xgen:
"""This class generates random numbers from a mixture of gaussian distribution"""
def __init__(self, p0, mu0, sigma0):
self.p1 = numpy.array([p0[:i+1].sum() for i in range(len(p0))])
self.mu0 = mu0
self.sigma0 = sigma0
def __iter__(self):
return self
def __next__(self):
i = numpy.where(self.p1>=numpy.random.rand())[0][0]
return numpy.random.normal(self.mu0[i], self.sigma0[i])
def normal_function(x, p, mu, sigma, library=numpy):
return p.dot(library.exp((-(x[None, :] - mu[:, None])**2.0)/(2.0*(sigma**2.0)[:, None])) / (numpy.sqrt(2.0*numpy.pi)*sigma[:, None]))
def plot_data(data, true_parameters, models, args):
n = len(data)
active_models=[i for i in models.keys() if numpy.isfinite(models[i][0])]
if len(active_models) == 0:
return # there is nothing to plot :(
objectives = numpy.array([models[i][0] for i in active_models])
model_probs = softmax(-numpy.log(n)*objectives.reshape((1, -1)))[0]
best_model = active_models[numpy.argmin(objectives)]
info = "n=%d, k=%d" % (n, best_model)
print(info, flush=True)
fig = plt.figure()
fig.suptitle(info)
# actual data points
plt.scatter(data, numpy.zeros_like(data), color='g', marker='.')
# data points for visualization
x = numpy.linspace(args.mumin, args.mumax, 100)
plt.plot(x, normal_function(x, *true_parameters), color='green')
for i in range(len(active_models)):
plt.plot(x, normal_function(x, *(models[active_models[i]][1:])),
color='blue', alpha=model_probs[i])
plt.gca().set_ylim([0, 1])
fig.savefig(args.format_str % n,
dpi=300, transparent=True, bbox_inches='tight', pad_inches=0
)
plt.close(fig)
def main(args):
max_n = args.min + args.epoch * args.add
args.format_str = args.filename
args.format_str+= "_n%%0%dd" % numpy.ceil(numpy.log10(max_n + 1))
args.format_str+= "." + args.format
# The true model
p0 = numpy.array([0.5, 0.5]).astype(float)
mu0 = numpy.array([-1, 1]).astype(float)
sigma0 = numpy.array([0.5, 0.7]).astype(float)
true_parameters = (p0, mu0, sigma0)
# Theano
print("Compiling functions ...", file=sys.stderr, end=" ", flush=True)
# constraints
p_ = T.vector('p')
p__ = T.exp(p_)
p = p__/p__.sum()
sigma_ = T.vector('sigma')
mu_ = T.vector('mu')
mu = T.clip(mu_, args.mumin, args.mumax)
sigma = T.clip(sigma_, args.sigmamin, args.sigmamax)
x_shared = theano.shared(numpy.zeros(1)) # let's start with a dummy data point
# likelihood
probs = normal_function(x_shared, p, mu, sigma, T)
# so called _objective function_
f = -T.log(probs).mean()
opt = GradientDescentOptimizer(f, p_, mu_, sigma_, eta=args.eta, constraints={p_: p, mu_: mu, sigma_: sigma})
# here dummy initialization is also fine
opt.init(p0, mu0, sigma0)
print("4", file=sys.stderr, end=" ", flush=True)
getter_f = opt.get_vars()
print("3", file=sys.stderr, end=" ", flush=True)
eval_f = theano.function([], f, givens=opt.givens())
print("2", file=sys.stderr, end=" ", flush=True)
update_f = theano.function([], f, givens=opt.givens(), updates=opt.updates())
print("1", file=sys.stderr, end=" ", flush=True)
hessian_f = theano.function([], Hessian(f, p, mu, sigma), givens=opt.givens())
print("0", file=sys.stderr, flush=True)
def train(k, x0, p0=None, mu0=None, sigma0=None, iter=1000):
"""trains a given model or initializes one and trains it for given epochs"""
# sends the dataset to theano
x_shared.set_value(x0)
keep_sq = p0 is not None or mu0 is not None or sigma0 is not None
# initial parameters (optional)
if p0 is None:
p0 = numpy.random.uniform(low=0, high=1, size=k)
if mu0 is None:
mu0 = numpy.random.uniform(low=args.mumin, high=args.mumax, size=k)
if sigma0 is None:
low = numpy.log(args.sigmamin)
high = numpy.log(args.sigmamax)
# sigma0 = numpy.exp(numpy.random.uniform(low, high, size=k))
sigma0 = numpy.random.uniform(args.sigmamin, args.sigmamax, size=k)
# overrides or initializes parameters for theano
opt.init(p0, mu0, sigma0)
for _ in range(iter):
optimal_value = update_f()
if not numpy.isfinite(optimal_value):
break
p, mu, sigma = getter_f()
optimal_value = eval_f()
model_volume = k*numpy.log((args.mumax - args.mumin)*(args.sigmamax - args.sigmamin)) + \
numpy.log(numpy.sqrt(k)/numpy.math.factorial(k-1))
hessian = numpy.linalg.slogdet(hessian_f())[1]
# number of data points
n = len(x0)
# number of parameters
d = k-1 + k*2
total_objective = model_volume/n + optimal_value + 0.5*hessian/n + (d/(2.0*n))*numpy.log(n/(2.0*numpy.pi))
return total_objective, p, mu, sigma
data_generator = xgen(*true_parameters)
x0 = list(islice(data_generator, args.min))
# Let's start
K = range(1, args.k + 1)
# train initial models
params={k: train(k, numpy.array(x0), iter=args.iter) for k in K}
plot_data(x0, true_parameters, params, args)
for _ in range(args.epoch):
for k in K:
if k not in params or (not numpy.isfinite(params[k][0])) or args.random:
params[k] = train(k, numpy.array(x0), iter=args.iter)
else:
params[k] = train(k, numpy.array(x0), *(params[k][1:]),
iter=args.iter)
plot_data(x0, true_parameters, params, args)
for _ in range(args.add):
x0.append(next(data_generator))
return 0
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Fits mixture of Gaussian model to a gradually increasing dataset. "
"You can see how the models compete. "
"Contact: http://math.bme.hu/~borbely/indexeng",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-k', dest='k', type=int, default=5,
help='max number of mixtures')
parser.add_argument('--min', dest='min', type=int,
help='number of datapoints to begin with', default=1)
parser.add_argument('--add', dest='add', type=int, default=10,
help='number of datapoints added when new data arrive')
parser.add_argument('-i', "--iter", dest='iter', type=int, default=1000,
help='number of gradient descent optimization steps per model')
parser.add_argument('-e', "--epoch", dest='epoch', type=int, default=11,
help='how many times a new data arrive')
parser.add_argument('-r', "--random", dest='random',
help='learns new random models each time',
action='store_true', default=False)
parser.add_argument('-smin', "--sigmamin", dest='sigmamin', type=float,
help='minimum variance of the Gaussian', default=0.1)
parser.add_argument('-smax', "--sigmamax", dest='sigmamax', type=float,
help='maximum variance of the Gaussian', default=10)
parser.add_argument('-mmin', "--mumin", dest='mumin', type=float,
help='minimum mean of the Gaussian', default=-10)
parser.add_argument('-mmax', "--mumax", dest='mumax', type=float,
help='maximum mean of the Gaussian', default=10)
parser.add_argument("-l", "--eta", "--lerningrate", dest='eta',
type=float, default=0.1, help='learning rate')
parser.add_argument("-f", "--filename", dest='filename',
type=str, default="bmc",
help='filename prefix for the output images')
parser.add_argument("--format", dest='format',
type=str, default="png",
help='matplotlib export format')
exit(main(parser.parse_args()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment