juancamilog/gist:af3f15bb719e143fe731

## gistfile1.txt
import theano
import theano.tensor as T
from theano.sandbox.linalg import psd,matrix_inverse,det,cholesky
import numpy as np
from time import time

def SEard(loghyp,X):
    ''' Squared exponential kernel with diagonal scaling matrix (one lengthscale per dimension)'''
    N,idims = X.shape

    M = T.diag(T.exp(-2*loghyp[:idims]))
    XM = X.dot(M);
    sumXsq =  T.sum(XM*X,1)
    D = sumXsq[:,None] + sumXsq[None,:] - 2*XM.dot(X.T)

    K = T.exp(2*loghyp[idims] - 0.5*D) + T.eye(N)*T.exp(2*loghyp[idims+1])
    return K


def nlml_(X,Y, loghyps):
    N = X.shape[0]
    K = SEard(loghyps,X)
    iK = matrix_inverse(psd(K))
    beta = iK.dot(Y)
    return 0.5*(Y.T.dot(beta) + T.log(det(psd(K))) + N*np.log(2*np.pi))

def nlml_chol_(X,Y, loghyps):
    N = X.shape[0]
    K = SEard(loghyps,X)
    cholK = cholesky(K)
    Yc = matrix_inverse(cholK).dot(Y) # this should get optimized into a call to solve. Didn't call solve directly since it does not provide a gradient
    return 0.5*(Yc.T.dot(Yc) + 2*T.sum(T.log(T.diag(cholK))) + N*np.log(2*np.pi))

# generate some test data
np.set_printoptions(linewidth=200)
n_samples = 1000
idims = 4
def f(X):
    return np.sin(X.sum(1))*np.cos(X.sum(1))*np.exp(-0.5*np.sum(X**2,1))
Xd = np.random.multivariate_normal(np.zeros((idims,)),10*np.eye(idims),n_samples)
Yd = f(Xd)

# set some initial hyper parameters
lh = np.zeros((idims+2,))
lh[:idims] = np.var(Xd,0,ddof=1)
lh[idims] = np.std(Yd,ddof=1)
lh[idims+1] = 0.1*np.std(Yd,ddof=1)
lh = np.log(lh)

X = theano.shared(Xd)
Y = theano.shared(Yd)
loghyp1 = theano.shared(lh, borrow=False)
loghyp2 = theano.shared(lh, borrow=False)

nlml =  nlml_(X,Y,loghyp1)
dnlml = T.grad(nlml,loghyp1)

nlml_chol =  nlml_chol_(X,Y,loghyp2)
dnlml_chol = T.grad(nlml_chol,loghyp2)

f1 = theano.function([],[nlml,dnlml])
f2 = theano.function([],[nlml_chol,dnlml_chol])


t_nlml_ = 0
t_nlml_chol_ = 0
n_tests = 10
for i in xrange(n_tests):
    print '--- %d ---'%(i)
    # test nlml
    start = time()
    ret = f1()
    t_nlml = time()-start
    loghyp1.set_value(loghyp1.get_value()-1e-5*ret[1])
    print ret,'Elapsed: %f'%(t_nlml)

    t_nlml_ += t_nlml

    # test nlml_chol
    start = time()
    ret = f2()
    t_nlml_chol = time()-start
    loghyp2.set_value(loghyp2.get_value()-1e-5*ret[1])
    print ret,'Elapsed: %f'%(t_nlml_chol)

    t_nlml_chol_ += t_nlml_chol

print 'Average for nlml using matrix inverse: %f'%(t_nlml_/n_tests)
print 'Average for nlml using cholesky: %f'%(t_nlml_chol_/n_tests)
	import theano
	import theano.tensor as T
	from theano.sandbox.linalg import psd,matrix_inverse,det,cholesky
	import numpy as np
	from time import time

	def SEard(loghyp,X):
	''' Squared exponential kernel with diagonal scaling matrix (one lengthscale per dimension)'''
	N,idims = X.shape

	M = T.diag(T.exp(-2*loghyp[:idims]))
	XM = X.dot(M);
	sumXsq = T.sum(XM*X,1)
	D = sumXsq[:,None] + sumXsq[None,:] - 2*XM.dot(X.T)

	K = T.exp(2loghyp[idims] - 0.5D) + T.eye(N)T.exp(2loghyp[idims+1])
	return K


	def nlml_(X,Y, loghyps):
	N = X.shape[0]
	K = SEard(loghyps,X)
	iK = matrix_inverse(psd(K))
	beta = iK.dot(Y)
	return 0.5(Y.T.dot(beta) + T.log(det(psd(K))) + Nnp.log(2*np.pi))

	def nlml_chol_(X,Y, loghyps):
	N = X.shape[0]
	K = SEard(loghyps,X)
	cholK = cholesky(K)
	Yc = matrix_inverse(cholK).dot(Y) # this should get optimized into a call to solve. Didn't call solve directly since it does not provide a gradient
	return 0.5(Yc.T.dot(Yc) + 2T.sum(T.log(T.diag(cholK))) + Nnp.log(2np.pi))

	# generate some test data
	np.set_printoptions(linewidth=200)
	n_samples = 1000
	idims = 4
	def f(X):
	return np.sin(X.sum(1))np.cos(X.sum(1))np.exp(-0.5np.sum(X*2,1))
	Xd = np.random.multivariate_normal(np.zeros((idims,)),10*np.eye(idims),n_samples)
	Yd = f(Xd)

	# set some initial hyper parameters
	lh = np.zeros((idims+2,))
	lh[:idims] = np.var(Xd,0,ddof=1)
	lh[idims] = np.std(Yd,ddof=1)
	lh[idims+1] = 0.1*np.std(Yd,ddof=1)
	lh = np.log(lh)

	X = theano.shared(Xd)
	Y = theano.shared(Yd)
	loghyp1 = theano.shared(lh, borrow=False)
	loghyp2 = theano.shared(lh, borrow=False)

	nlml = nlml_(X,Y,loghyp1)
	dnlml = T.grad(nlml,loghyp1)

	nlml_chol = nlml_chol_(X,Y,loghyp2)
	dnlml_chol = T.grad(nlml_chol,loghyp2)

	f1 = theano.function([],[nlml,dnlml])
	f2 = theano.function([],[nlml_chol,dnlml_chol])


	t_nlml_ = 0
	t_nlml_chol_ = 0
	n_tests = 10
	for i in xrange(n_tests):
	print '--- %d ---'%(i)
	# test nlml
	start = time()
	ret = f1()
	t_nlml = time()-start
	loghyp1.set_value(loghyp1.get_value()-1e-5*ret[1])
	print ret,'Elapsed: %f'%(t_nlml)

	t_nlml_ += t_nlml

	# test nlml_chol
	start = time()
	ret = f2()
	t_nlml_chol = time()-start
	loghyp2.set_value(loghyp2.get_value()-1e-5*ret[1])
	print ret,'Elapsed: %f'%(t_nlml_chol)

	t_nlml_chol_ += t_nlml_chol

	print 'Average for nlml using matrix inverse: %f'%(t_nlml_/n_tests)
	print 'Average for nlml using cholesky: %f'%(t_nlml_chol_/n_tests)