khakieconomics/discrete_time_ngarch.R

## discrete_time_ngarch.R
# In this demonstration, I estimate a discrete-time NGARCH model.
# I first simulate the model with known parameters, then try to
# recover the parameters from simulated data. Then I apply the
# model to weekly returns for Google.

# The model seems to be able to recover most of the parameters
# fairly well, with inference for alpha, beta fairly week. gamma
# does not seem to be well identified.

# jim savage, james@lendable.io

library(ggplot2); library(Quandl); library(dplyr); library(rstan)

# Get some data to play with
Goog <- Quandl("YAHOO/GOOG", collapse = "weekly") %>%
  arrange(Date)

# Take log difference
R <- diff(log(Goog$`Adjusted Close`))

# Generate some fake data

# Sample parameters from priors
N <- 500
r <- rnorm(1, 0, 0.01);
h0 <- sqrt(rnorm(1, .005, .005)^2); // no idea!
dW <- matrix(rnorm(N*2, 0, 1), N, 2); // if W is Weiner, one-step increments ~ normal(0, 1)
lambda <- rnorm(1, 0.1, .1);
alpha <- sqrt(rnorm(1, 0, .1)^2); # constrain positive
beta <- -1*sqrt(rnorm(1, 0, .2)^2); # constrain negative
gamma <- rnorm(1, 0, .2);
omega <- rnorm(1, 0, .01);

h <- rep(NA, N)
R3 <- rep(NA, N)
for(t in 1:N) {
  if(t==1){
    h[t] <- (omega + h0)*(1/(1 - (beta + alpha*(1 + gamma^2) - 2*gamma*alpha*dW[t,1] - sqrt(2)*alpha*dW[t,2])));
  } else {
    h[t] <- (omega + h[t-1])*(1/(1 - (beta + alpha*(1 + gamma^2) - 2*gamma*alpha*dW[t,1] - sqrt(2)*alpha*dW[t,2])));
  }
  R3[t] <- rnorm(1, r + lambda * sqrt(h[t]) - .5*h[t], sqrt(h[t]))
}

# Declare a model: you should really do this in
# a stan file
model <- "data{
  int N;
  vector[N] R;
}
parameters {
  real r;
  real lambda;
  matrix[N, 2] dW;
  real gamma;
  // I don't think alpha is sign identified
  real<lower = 0> alpha;
  real omega;
  real<upper = 0> beta;
  real<lower = 0> h0;
}
transformed parameters {

  vector[N] h;
  {
    for(t in 1:N) {
      if(t==1){
        h[t] <- (omega + h0)*(1/(1 - (beta + alpha*(1 + pow(gamma, 2)) - 2*gamma*alpha*dW[t,1] - sqrt(2)*alpha*dW[t,2])));
      } else {
        h[t] <- (omega + h[t-1])*(1/(1 - (beta + alpha*(1 + pow(gamma, 2)) - 2*gamma*alpha*dW[t,1] - sqrt(2)*alpha*dW[t,2])));
      }
  }


  }
}
model {
  // priors
  r ~ normal(0, 0.01);
  h0 ~ normal(0.05, 0.05); // no idea!
  to_vector(dW) ~ normal(0, 1); // if W is Weiner, one-step increments ~ normal(0, 1)
  lambda ~ normal(0.1, .1);
  alpha ~ normal(0, 1);
  beta ~ normal(0, 1);
  gamma ~ normal(0, .2);
  omega ~ normal(0, .01);

  // measurement model
  for(t in 1:N){
    R[t] ~ normal(r + lambda * sqrt(h[t]) - .5*h[t], sqrt(h[t]));
  }


}"

# Compile your model
compiled_model <- stan_model(model_code = model)
# Sample from the model (takes 5 mins or so on my computer)
test_hmc <- sampling(compiled_model, data = list(R = R3, N = N), cores = 4, iter = 2000)
print(test_hmc, pars = c("alpha", "beta", "omega", "r", "lambda", "gamma"))
print(c(alpha, beta, omega, r, lambda, gamma))

pars <- extract(test_hmc, permuted = F) %>% plyr::adply(2) %>% dplyr::select(contains("h["))
# Get 95% credibility interval
par_interval <- do.call(bind_rows, apply(pars, 2, function(x) data_frame(median = median(sqrt(x))))) %>%
  mutate(real_h = h)
# Plot the estimated and known volatility measures
plot(par_interval$median, par_interval$real_h)


# Now run the model on Google
google_model <- sampling(compiled_model, data = list(R = R, N = length(N)), cores = 4, iter = 2000)

# Grab the parameters you want
pars <- extract(google_model, permuted = F) %>% plyr::adply(2) %>% dplyr::select(contains("h["))
# Get 95% credibility interval
par_interval <- do.call(bind_rows, apply(pars, 2, function(x) data_frame(median = median(sqrt(x)),
                                                                         lower = quantile(sqrt(x), 0.025),
                                                                         upper = quantile(sqrt(x), 0.975)))) %>%
  mutate(time = Goog$Date[-1])

# Plot the volatility
par_interval %>%
  filter(time > "2005-06-01") %>%
  ggplot(aes(x = time)) +
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = "orange", alpha = 0.3) +
  geom_line(aes(y = median)) +
  ylab("Volatility") +
  xlab("Date")
	# In this demonstration, I estimate a discrete-time NGARCH model.
	# I first simulate the model with known parameters, then try to
	# recover the parameters from simulated data. Then I apply the
	# model to weekly returns for Google.

	# The model seems to be able to recover most of the parameters
	# fairly well, with inference for alpha, beta fairly week. gamma
	# does not seem to be well identified.

	# jim savage, james@lendable.io

	library(ggplot2); library(Quandl); library(dplyr); library(rstan)

	# Get some data to play with
	Goog <- Quandl("YAHOO/GOOG", collapse = "weekly") %>%
	arrange(Date)

	# Take log difference
	R <- diff(log(Goog$`Adjusted Close`))

	# Generate some fake data

	# Sample parameters from priors
	N <- 500
	r <- rnorm(1, 0, 0.01);
	h0 <- sqrt(rnorm(1, .005, .005)^2); // no idea!
	dW <- matrix(rnorm(N*2, 0, 1), N, 2); // if W is Weiner, one-step increments ~ normal(0, 1)
	lambda <- rnorm(1, 0.1, .1);
	alpha <- sqrt(rnorm(1, 0, .1)^2); # constrain positive
	beta <- -1*sqrt(rnorm(1, 0, .2)^2); # constrain negative
	gamma <- rnorm(1, 0, .2);
	omega <- rnorm(1, 0, .01);

	h <- rep(NA, N)
	R3 <- rep(NA, N)
	for(t in 1:N) {
	if(t==1){
	h[t] <- (omega + h0)(1/(1 - (beta + alpha(1 + gamma^2) - 2gammaalphadW[t,1] - sqrt(2)alpha*dW[t,2])));
	} else {
	h[t] <- (omega + h[t-1])(1/(1 - (beta + alpha(1 + gamma^2) - 2gammaalphadW[t,1] - sqrt(2)alpha*dW[t,2])));
	}
	R3[t] <- rnorm(1, r + lambda * sqrt(h[t]) - .5*h[t], sqrt(h[t]))
	}

	# Declare a model: you should really do this in
	# a stan file
	model <- "data{
	int N;
	vector[N] R;
	}
	parameters {
	real r;
	real lambda;
	matrix[N, 2] dW;
	real gamma;
	// I don't think alpha is sign identified
	real<lower = 0> alpha;
	real omega;
	real<upper = 0> beta;
	real<lower = 0> h0;
	}
	transformed parameters {

	vector[N] h;
	{
	for(t in 1:N) {
	if(t==1){
	h[t] <- (omega + h0)(1/(1 - (beta + alpha(1 + pow(gamma, 2)) - 2gammaalphadW[t,1] - sqrt(2)alpha*dW[t,2])));
	} else {
	h[t] <- (omega + h[t-1])(1/(1 - (beta + alpha(1 + pow(gamma, 2)) - 2gammaalphadW[t,1] - sqrt(2)alpha*dW[t,2])));
	}
	}


	}
	}
	model {
	// priors
	r ~ normal(0, 0.01);
	h0 ~ normal(0.05, 0.05); // no idea!
	to_vector(dW) ~ normal(0, 1); // if W is Weiner, one-step increments ~ normal(0, 1)
	lambda ~ normal(0.1, .1);
	alpha ~ normal(0, 1);
	beta ~ normal(0, 1);
	gamma ~ normal(0, .2);
	omega ~ normal(0, .01);

	// measurement model
	for(t in 1:N){
	R[t] ~ normal(r + lambda * sqrt(h[t]) - .5*h[t], sqrt(h[t]));
	}


	}"

	# Compile your model
	compiled_model <- stan_model(model_code = model)
	# Sample from the model (takes 5 mins or so on my computer)
	test_hmc <- sampling(compiled_model, data = list(R = R3, N = N), cores = 4, iter = 2000)
	print(test_hmc, pars = c("alpha", "beta", "omega", "r", "lambda", "gamma"))
	print(c(alpha, beta, omega, r, lambda, gamma))

	pars <- extract(test_hmc, permuted = F) %>% plyr::adply(2) %>% dplyr::select(contains("h["))
	# Get 95% credibility interval
	par_interval <- do.call(bind_rows, apply(pars, 2, function(x) data_frame(median = median(sqrt(x))))) %>%
	mutate(real_h = h)
	# Plot the estimated and known volatility measures
	plot(par_interval$median, par_interval$real_h)



	# Now run the model on Google
	google_model <- sampling(compiled_model, data = list(R = R, N = length(N)), cores = 4, iter = 2000)

	# Grab the parameters you want
	pars <- extract(google_model, permuted = F) %>% plyr::adply(2) %>% dplyr::select(contains("h["))
	# Get 95% credibility interval
	par_interval <- do.call(bind_rows, apply(pars, 2, function(x) data_frame(median = median(sqrt(x)),
	lower = quantile(sqrt(x), 0.025),
	upper = quantile(sqrt(x), 0.975)))) %>%
	mutate(time = Goog$Date[-1])

	# Plot the volatility
	par_interval %>%
	filter(time > "2005-06-01") %>%
	ggplot(aes(x = time)) +
	geom_ribbon(aes(ymin = lower, ymax = upper), fill = "orange", alpha = 0.3) +
	geom_line(aes(y = median)) +
	ylab("Volatility") +
	xlab("Date")