|
from __future__ import print_function |
|
|
|
import numpy |
|
import theano |
|
import theano.tensor as T |
|
from thextensions import * |
|
|
|
from sklearn.utils.extmath import softmax |
|
import matplotlib |
|
matplotlib.use('Agg') |
|
import matplotlib.pyplot as plt |
|
|
|
from itertools import islice |
|
import argparse |
|
import sys |
|
|
|
class xgen: |
|
"""This class generates random numbers from a mixture of gaussian distribution""" |
|
def __init__(self, p0, mu0, sigma0): |
|
self.p1 = numpy.array([p0[:i+1].sum() for i in range(len(p0))]) |
|
self.mu0 = mu0 |
|
self.sigma0 = sigma0 |
|
def __iter__(self): |
|
return self |
|
def __next__(self): |
|
i = numpy.where(self.p1>=numpy.random.rand())[0][0] |
|
return numpy.random.normal(self.mu0[i], self.sigma0[i]) |
|
|
|
def normal_function(x, p, mu, sigma, library=numpy): |
|
return p.dot(library.exp((-(x[None, :] - mu[:, None])**2.0)/(2.0*(sigma**2.0)[:, None])) / (numpy.sqrt(2.0*numpy.pi)*sigma[:, None])) |
|
|
|
def plot_data(data, true_parameters, models, args): |
|
n = len(data) |
|
active_models=[i for i in models.keys() if numpy.isfinite(models[i][0])] |
|
|
|
if len(active_models) == 0: |
|
return # there is nothing to plot :( |
|
|
|
objectives = numpy.array([models[i][0] for i in active_models]) |
|
model_probs = softmax(-numpy.log(n)*objectives.reshape((1, -1)))[0] |
|
|
|
best_model = active_models[numpy.argmin(objectives)] |
|
|
|
info = "n=%d, k=%d" % (n, best_model) |
|
print(info, flush=True) |
|
|
|
fig = plt.figure() |
|
fig.suptitle(info) |
|
|
|
# actual data points |
|
plt.scatter(data, numpy.zeros_like(data), color='g', marker='.') |
|
|
|
# data points for visualization |
|
x = numpy.linspace(args.mumin, args.mumax, 100) |
|
plt.plot(x, normal_function(x, *true_parameters), color='green') |
|
|
|
for i in range(len(active_models)): |
|
plt.plot(x, normal_function(x, *(models[active_models[i]][1:])), |
|
color='blue', alpha=model_probs[i]) |
|
|
|
plt.gca().set_ylim([0, 1]) |
|
|
|
fig.savefig(args.format_str % n, |
|
dpi=300, transparent=True, bbox_inches='tight', pad_inches=0 |
|
) |
|
plt.close(fig) |
|
|
|
def main(args): |
|
max_n = args.min + args.epoch * args.add |
|
args.format_str = args.filename |
|
args.format_str+= "_n%%0%dd" % numpy.ceil(numpy.log10(max_n + 1)) |
|
args.format_str+= "." + args.format |
|
|
|
# The true model |
|
p0 = numpy.array([0.5, 0.5]).astype(float) |
|
mu0 = numpy.array([-1, 1]).astype(float) |
|
sigma0 = numpy.array([0.5, 0.7]).astype(float) |
|
true_parameters = (p0, mu0, sigma0) |
|
|
|
# Theano |
|
print("Compiling functions ...", file=sys.stderr, end=" ", flush=True) |
|
|
|
# constraints |
|
p_ = T.vector('p') |
|
p__ = T.exp(p_) |
|
p = p__/p__.sum() |
|
|
|
sigma_ = T.vector('sigma') |
|
mu_ = T.vector('mu') |
|
|
|
mu = T.clip(mu_, args.mumin, args.mumax) |
|
sigma = T.clip(sigma_, args.sigmamin, args.sigmamax) |
|
|
|
x_shared = theano.shared(numpy.zeros(1)) # let's start with a dummy data point |
|
|
|
# likelihood |
|
probs = normal_function(x_shared, p, mu, sigma, T) |
|
# so called _objective function_ |
|
f = -T.log(probs).mean() |
|
|
|
opt = GradientDescentOptimizer(f, p_, mu_, sigma_, eta=args.eta, constraints={p_: p, mu_: mu, sigma_: sigma}) |
|
|
|
# here dummy initialization is also fine |
|
opt.init(p0, mu0, sigma0) |
|
|
|
print("4", file=sys.stderr, end=" ", flush=True) |
|
getter_f = opt.get_vars() |
|
print("3", file=sys.stderr, end=" ", flush=True) |
|
eval_f = theano.function([], f, givens=opt.givens()) |
|
print("2", file=sys.stderr, end=" ", flush=True) |
|
update_f = theano.function([], f, givens=opt.givens(), updates=opt.updates()) |
|
print("1", file=sys.stderr, end=" ", flush=True) |
|
hessian_f = theano.function([], Hessian(f, p, mu, sigma), givens=opt.givens()) |
|
print("0", file=sys.stderr, flush=True) |
|
|
|
def train(k, x0, p0=None, mu0=None, sigma0=None, iter=1000): |
|
"""trains a given model or initializes one and trains it for given epochs""" |
|
# sends the dataset to theano |
|
x_shared.set_value(x0) |
|
|
|
keep_sq = p0 is not None or mu0 is not None or sigma0 is not None |
|
# initial parameters (optional) |
|
if p0 is None: |
|
p0 = numpy.random.uniform(low=0, high=1, size=k) |
|
if mu0 is None: |
|
mu0 = numpy.random.uniform(low=args.mumin, high=args.mumax, size=k) |
|
if sigma0 is None: |
|
low = numpy.log(args.sigmamin) |
|
high = numpy.log(args.sigmamax) |
|
# sigma0 = numpy.exp(numpy.random.uniform(low, high, size=k)) |
|
sigma0 = numpy.random.uniform(args.sigmamin, args.sigmamax, size=k) |
|
# overrides or initializes parameters for theano |
|
opt.init(p0, mu0, sigma0) |
|
|
|
for _ in range(iter): |
|
optimal_value = update_f() |
|
if not numpy.isfinite(optimal_value): |
|
break |
|
p, mu, sigma = getter_f() |
|
|
|
optimal_value = eval_f() |
|
|
|
model_volume = k*numpy.log((args.mumax - args.mumin)*(args.sigmamax - args.sigmamin)) + \ |
|
numpy.log(numpy.sqrt(k)/numpy.math.factorial(k-1)) |
|
|
|
hessian = numpy.linalg.slogdet(hessian_f())[1] |
|
|
|
# number of data points |
|
n = len(x0) |
|
|
|
# number of parameters |
|
d = k-1 + k*2 |
|
|
|
total_objective = model_volume/n + optimal_value + 0.5*hessian/n + (d/(2.0*n))*numpy.log(n/(2.0*numpy.pi)) |
|
|
|
return total_objective, p, mu, sigma |
|
|
|
data_generator = xgen(*true_parameters) |
|
x0 = list(islice(data_generator, args.min)) |
|
|
|
# Let's start |
|
|
|
K = range(1, args.k + 1) |
|
# train initial models |
|
params={k: train(k, numpy.array(x0), iter=args.iter) for k in K} |
|
plot_data(x0, true_parameters, params, args) |
|
|
|
for _ in range(args.epoch): |
|
for k in K: |
|
if k not in params or (not numpy.isfinite(params[k][0])) or args.random: |
|
params[k] = train(k, numpy.array(x0), iter=args.iter) |
|
else: |
|
params[k] = train(k, numpy.array(x0), *(params[k][1:]), |
|
iter=args.iter) |
|
plot_data(x0, true_parameters, params, args) |
|
for _ in range(args.add): |
|
x0.append(next(data_generator)) |
|
return 0 |
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser( |
|
description="Fits mixture of Gaussian model to a gradually increasing dataset. " |
|
"You can see how the models compete. " |
|
"Contact: http://math.bme.hu/~borbely/indexeng", |
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
|
|
|
parser.add_argument('-k', dest='k', type=int, default=5, |
|
help='max number of mixtures') |
|
|
|
parser.add_argument('--min', dest='min', type=int, |
|
help='number of datapoints to begin with', default=1) |
|
|
|
parser.add_argument('--add', dest='add', type=int, default=10, |
|
help='number of datapoints added when new data arrive') |
|
|
|
parser.add_argument('-i', "--iter", dest='iter', type=int, default=1000, |
|
help='number of gradient descent optimization steps per model') |
|
|
|
parser.add_argument('-e', "--epoch", dest='epoch', type=int, default=11, |
|
help='how many times a new data arrive') |
|
|
|
parser.add_argument('-r', "--random", dest='random', |
|
help='learns new random models each time', |
|
action='store_true', default=False) |
|
|
|
parser.add_argument('-smin', "--sigmamin", dest='sigmamin', type=float, |
|
help='minimum variance of the Gaussian', default=0.1) |
|
|
|
parser.add_argument('-smax', "--sigmamax", dest='sigmamax', type=float, |
|
help='maximum variance of the Gaussian', default=10) |
|
|
|
parser.add_argument('-mmin', "--mumin", dest='mumin', type=float, |
|
help='minimum mean of the Gaussian', default=-10) |
|
|
|
parser.add_argument('-mmax', "--mumax", dest='mumax', type=float, |
|
help='maximum mean of the Gaussian', default=10) |
|
|
|
parser.add_argument("-l", "--eta", "--lerningrate", dest='eta', |
|
type=float, default=0.1, help='learning rate') |
|
|
|
parser.add_argument("-f", "--filename", dest='filename', |
|
type=str, default="bmc", |
|
help='filename prefix for the output images') |
|
|
|
parser.add_argument("--format", dest='format', |
|
type=str, default="png", |
|
help='matplotlib export format') |
|
|
|
exit(main(parser.parse_args())) |