Skip to content

Instantly share code, notes, and snippets.

@mike-lawrence
Created December 20, 2016 20:13
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mike-lawrence/e200bad6558a8267eeb7ea44a4384730 to your computer and use it in GitHub Desktop.
Save mike-lawrence/e200bad6558a8267eeb7ea44a4384730 to your computer and use it in GitHub Desktop.
Example application of Gaussian Process modelling
#load packages
library("curl")
library("ggplot2")
library("rstan")
#retrieve the data
tmpf = tempfile()
curl_download("http://www.metoffice.gov.uk/hadobs/hadcrut4/data/current/time_series/HadCRUT.4.5.0.0.annual_ns_avg.txt", tmpf)
gtemp = read.table(tmpf, colClasses = rep("numeric", 12))[, 1:2] # only want some of the variables
names(gtemp) = c("Year", "Temperature")
#visualize
p = ggplot() +
geom_point(
data = gtemp
, aes(
x = Year
, y = Temperature
)
)
print(p)
#scale for default prior specification
x = scale(gtemp$Year)[,1]
y = scale(gtemp$Temperature)[,1]
#Compile the model
gp_model = stan_model(file="gp_fit.stan")
#sample the model
post = sampling(
gp_model
, data=list(
x = x
, N = length(x)
, y = y
, fudge = 1e-3 #smaller value will be more accurate but take more time
)
, iter = 2e2 #2e2 takes about 3mins
, chains = 4
, cores = 4
, seed = 1
)
alarm()
#look at summary for the GP parameters
print(
post
, pars = c('eta','rho','noise')
, probs = c(.025,.5,.975)
, digits = 2
)
#get posterior samples on latent function f
f = extract(
post
, pars = 'f'
)[[1]]
#revert to original scale
f = f*sd(gtemp$Temperature) + mean(gtemp$Temperature)
#compute median & 95% credible intervals
f_med = apply(f,2,median)
f_loci = apply(f,2,quantile,prob=.025)
f_hici = apply(f,2,quantile,prob=.975)
#add to plot
p = p + geom_ribbon(
data = data.frame(
x = gtemp$Year
, ymin = f_loci
, ymax = f_hici
)
, mapping = aes(
x = x
, ymin = ymin
, ymax = ymax
)
, alpha = .5
, fill = 'blue'
)
p = p + geom_line(
data = data.frame(
x = gtemp$Year
, y = f_med
)
, mapping = aes(
x = x
, y = y
)
, alpha = .5
, colour = 'blue'
)
print(p)
data {
# N: num unique x values
int<lower=1> N ;
# x: x values
vector[N] x ;
# y: data
vector[N] y ;
# fudge: a fudge factor to ensure positive-definite covariance matrix
real<lower=0> fudge ;
}
transformed data {
# dx: for storing the pairwise distances between x values
matrix[N,N] dx ;
#compute pairwise distances between x values
for (i in 1:(N-1)) {
dx[i,i] = 0 ; #Diagonal
for (j in (i+1):N) { #off-diagonal
dx[i,j] = -( x[i] - x[j] )^2 ;
dx[j,i] = dx[i,j] ; # lower mirrors upper
}
}
dx[N,N] = 0 ; #final diagonal
}
parameters {
# eta: GP parameter, scale of latent function f
real<lower=0> eta ;
# noise: scale of measurement noise of y given f
real<lower=0> noise ;
# z: dummy variable to speed up computation of GP
# (See "Cholesky Factored and Transformed Implementation" from manual)
vector[N] z ;
# q: dummy variable used to imply cauchy(0,1) prior on rho
# (See "Reparameterizing the Cauchy" from manual)
real<lower=0,upper=pi()/2> q ;
}
transformed parameters{
# rho: GP parameter, x-axis scale of influence between points (wiggliness)
real<lower=0> rho ;
# f: latent function values
vector[N] f ;
rho = tan(q) ; #implies rho ~ cauchy(0,1), peaked @ zero with heavy tail
{ #local environment so we don't bother keeping L & covMat
# L: cholesky decomposition of covariance matrix
matrix[N,N] L ;
# covMat: covariance matrix
matrix[N,N] covMat ;
for (i in 1:(N-1)) {
covMat[i,i] = eta + fudge ; #Diagonal
for (j in (i+1):N) { #off-diagonal
covMat[i,j] = exp(dx[i,j]*rho)*eta ; ;
covMat[j,i] = covMat[i,j] ; # lower mirrors upper
}
}
covMat[N,N] = eta + fudge ; #final diagonal
L = cholesky_decompose(covMat) ;
f = L * z ;
}
}
model {
# priors on GP kernel parameters
eta ~ weibull(2,1) ; #peaked around .8
# prior on noise
noise ~ weibull(2,1) ;#peaked around .8
# assert sampling of z as standard normal
z ~ normal(0,1) ;
# assert sampling of each replicate of y as the GP plus noise
y ~ normal(f,noise) ;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment