Skip to content

Instantly share code, notes, and snippets.

@tnagler
Last active September 25, 2019 14:30
Show Gist options
  • Save tnagler/73044bb254ff51f388fb67bbe5b8f28c to your computer and use it in GitHub Desktop.
Save tnagler/73044bb254ff51f388fb67bbe5b8f28c to your computer and use it in GitHub Desktop.
Appendix to "Generalized Additive Models for Pair-Copula Constructions": Code for the intraday FX application
title subtitle author date output
Appendix to "Generalized Additive Models for Pair-Copula Constructions"
Code for the intraday FX application
Thibault Vatter and Thomas Nagler
15 August, 2017
github_document

Preliminary definitions

Required packages

pack <- c('zoo','xts','pryr', 'plyr', 'dplyr','rugarch', 
          'gamCopula', 'parallel', 'doParallel')
for (p in pack) {
	library(p, character.only = TRUE)
}

Tools for data preparation

loadData <- function(file) {
  data <- read.csv(file, sep = ",")
  data$Time <- as.POSIXct(strptime(data$Time, format="%Y.%m.%e %H:%M:%S", tz = "CET"))
  data <- as.xts(data[,2:6], order.by=data$Time)
} 

get.return <- function(ask, bid){
  p <- (log(ask$Close)+log(bid$Close))/2
  return(diff(p)[-1])
}

get.price <- function(ask, bid){
  
  p <- (log(ask$Close)+log(bid$Close))/2
  return(p)
}

get.FFFXmat <- function(r, K, n, dayofweek = FALSE){
  
  rr <- index(r)
  tt <- c((1:length(r))/n) %% 1 
  d <- as.numeric(format(as.POSIXct(as.numeric(rr)+2*3600, 
                                    origin = "1970-01-01 00:00.00"), "%w"))-1
  
  if(dayofweek == FALSE){
    X <- matrix(0, ncol = 2*K, nrow = length(tt))
  }else{
    X <- matrix(0, ncol = 2*K+4, nrow = length(tt))
  }
  
  for(j in 1:K){
    X[,j] <- cos(2*pi*j*tt)
    X[,j+K] <- sin(2*pi*j*tt)
  }
  
  if(dayofweek == TRUE){
    X <- t(apply(cbind(X,d), 1, function(x){ 
      y <- x[1:(2*K+4)]
      if(x[2*K+4+1]>0){y[2*K+x[2*K+4+1]] <- 1}
      return(y)}))
  }
  
  m <- dim(X)[2]
  colnames(X) <- sapply(1:m, function(x) paste("X",x,sep = ""))
  
  return(X)
}

Fit routine for marginal EGARCH models

fitIntradayEGARCH <- function(r, p = 288) {
  ## External regressors
  X <- get.FFFXmat(r, 5, p)

  ## Exponential GARCH
  spec <- ugarchspec(variance.model = list(model = "eGARCH", 
                                           external.regressors = X),    
                     mean.model = list(include.mean = FALSE))
  
  ## Fit the model and extract the spot volatility
  out <- list(fit=ugarchfit(spec, as.numeric(r)))
  out$vol <- zoo(sigma(out$fit), order = index(r))
  out$res <- zoo(residuals(out$fit), order = index(r))
  
  return(out)
}

Helper function for bootstrapping

make_sim_model <- function(i, GVC, newdata) {
  set.seed(i)
  # Simulates from the model
  u.i <- data.frame(gamVineSimulate(GVC = GVC, newdata = newdata), newdata)
  # Re-estimates the parameters
  fit.i <- try(gamVineSeqFit(u.i, GVC, covariates = names(newdata) ,
                             method = "FS", n.iters = 40))
  # Obtain the new estimates
  val.i <- try(lapply(fit.i@model, function(x)
    gamBiCopPredict(x, newdata = newdata, type = "terms", target = "calib")$calib))
  terms.i <- sapply(val.i, function(y)
    sapply(colnames(y), function(x) length(strsplit(x, "X")[[1]])))
  val.i <- try(lapply(1:length(val.i), function(x)
    list(const = rep(attr(val.i[[x]], "constant"), nrow(newdata)),
         smooth = val.i[[x]][,terms.i[[x]] == 1],
         beta = apply(val.i[[x]][,terms.i[[x]] == 2],1,sum))))
  out <- if (inherits(val.i, "try-error")) NULL else val.i
  out
}

Custom plot functions

plot_margins <-  function(j, data, vol, start, end) {
  tt <- index(data %>% window(., start = start, end = end))
  ix <- which(is.element(tt,seq(min(tt)+3600*3, max(tt), 3600*24)))
  p <- round(60*24/as.numeric(diff(tt[1:2])))
  fmt <- "%a %m-%d"
  options(scipen=0)

  par(mfrow = c(1,3), mar = c(2.5, 2.5, 1.6, 1.1), font.main = 1, cex.main = 1) 
  plot(tt, as.numeric(data[tt,j]), 
       ylab = "", main = "Return", xlab = "Date (days)", 
       ylim = c(-3e-3,3e-3), xaxt = "n", lwd = 1.3, type = "l")
  lines(tt, as.numeric(2*vol[tt,j]), col = "red", lwd = 1.3)
  axis(side = 1, at = tt[ix], labels = format(tt[ix], fmt))
  
  a1 <- apply(abs(data), 2,function(x) 
    acf(x, 5*p, plot = FALSE)$acf[-1])
  a2 <- apply(abs(as.matrix(data)/as.matrix(vol)), 2,function(x) 
    acf(x, 5*p, plot = FALSE)$acf[-1])
  
  plot((1:(5*p))/p, a1[,j], 
       type = "l", main = "Autocorrelation", 
       xlab = "Lag (days)", ylab = "", ylim = c(-0.1,0.3),
       lwd = 1.3)
  lines((1:(5*p))/p, a2[,j], col = "red",
        lwd = 1.3)
  
  s <- apply(vol, 2, function(x) 
    apply(matrix(c(0,x), ncol = p, byrow = T), 2, mean))
  e <- apply(data, 2, function(x) 
    apply(matrix(c(0,x), ncol = p, byrow = T), 2, sd))
  hh <- data.frame(h = as.POSIXlt(t[1:p])$hour, m = as.POSIXlt(t[1:p])$min)
  sel <- order(hh$h,hh$m)
  h <- hh[sel,]$h+hh[sel,]$m/60
  plot(h, as.numeric(e[sel,j]), 
       type = "l", main = "Periodic Component", 
       xlab = "Time of Day (hours)", ylab = "", ylim = c(2e-4,1.5e-3), 
       lwd = 1.3, col = "black", xaxt = "n")
  lines(h, as.numeric(s[sel,j]), col = "red",
        lwd = 1.3)
  axis(1, at = c(0, 4, 8, 12, 16, 20,24), labels = c(0, 4, 8, 12, 16, 20,24))
}

plot_copula <- function(j, tt, GVC, smooth, param, plotempirical) {
  ix <- as.character(unique(cut(tt,'month')))
  ix <- sapply(ix[seq(2,length(ix),2)], function(m)  
    which(cut(tt,'month') == m)[1])
  
  tmp <- zoo(t(param[[j]]), order = tt)
  tau <- tanh((const[[j]][1,] + tmp[, 1] + smooth[[j]][1,])/2)
  tau2 <- tanh((const[[j]][1,] + smooth[[j]][1,])/2)
  
  par(mfrow = c(1,2), mar = c(2.5, 2.5, 1.6, 1.1))
  plot(tt, tau, 
       ylab = "", main = "Kendall's Tau", xlab = "Time (months)", 
       type = "l", ylim = c(-1,1), col = "black")
  lines(tt, tau, lwd = 1.3)
  polygon(c(tt,rev(tt)),c(as.vector(tanh((const[[j]][2,] + smooth[[j]][2,])/2)),
                          rev(as.vector(tanh((const[[j]][3,] + smooth[[j]][3,])/2)))),
          border = NA, col = gray(0.85, 0.8))
  lines(tt, tau2, lwd = 1.3, col = "red")
  
  ts <- c(12:96, 1:11)
  U <- cbind(GVC@model[[j]]@model$data$u1,GVC@model[[j]]@model$data$u2)
  p <- 96
  s <- sapply(ts, function(j) {
    sel <- seq(1,dim(U)[1],p) + (j-1)
    if (j == p) {
      sel <- sel[1:(length(sel)-1)]
    }
    atanh(cor(U[sel,], method = "kendall")[2])*2
  })
  
  plot(1:96, as.vector(tmp[, 1])[ts], ylab = "",
       main = "Periodic Component", xlab = "Time of Day (hours)", xaxt = "n",
       type = "l", ylim = c(-0.5,0.5))
  axis(1, at = c(1, 1 + (1:6) * 96/6), labels = c(0, 4, 8, 12, 16, 20,24))
  polygon(c(1:96, c(rev(12:96), rev(1:11))),
          c(as.vector(tmp[, 2])[ts], rev(as.vector(tmp[, 3])[ts])),
          border = NA, col = gray(0.85, 0.8))
  lines(as.vector(tmp[, 1])[ts], lwd = 1.3)
  if (plotempirical == TRUE) {
    points(1:96,s-mean(s))
  }
  abline(h = 0, lty = 3)
  
  ss <- 0.05
  # london 
  lines(rep(4 * 7, 2), c(0.4, 0.36)+ss, lty = 1)
  lines(rep(4 * 16, 2), c(0.4, 0.36)+ss, lty = 1)
  lines(4 * c(7, 16), rep(0.38, 2)+ss, lty = 1)
  text(4 * mean(c(7, 16)), 0.365+ss, "London", pos = 3)
  
  # new york 
  lines(rep(4 * 13, 2), c(0.32, 0.28)+ss, lty = 1)
  lines(rep(4 * 21, 2), c(0.32, 0.28)+ss, lty = 1)
  lines(4 * c(13, 21), rep(0.30, 2)+ss, lty = 1)
  text(4 * mean(c(13, 21)), 0.285+ss, "New York", pos = 3)
  
  # tokyo 
  lines(rep(4 * 23, 2), c(0.38, 0.34)+ss, lty = 1)
  lines(rep(4 * 7, 2), c(0.38, 0.34)+ss, lty = 1)
  lines(c(0, 4 * 7), rep(0.36, 2)+ss, lty = 1)
  lines(c(4 * 23, 24 * 7), rep(0.36, 2)+ss, lty = 1)
  text(4 * mean(c(-1, 7)), 0.345+ss, "Tokyo", pos = 3)
}

Data analysis

Data preparation

Pairs to be modeled and date range

pair.names <- c("EURUSD", "USDJPY", "GBPUSD", "USDCHF")
start <- "2013.03.10"
end <- "2013.11.02"
pair.files <- sapply(pair.names, function(x)
  paste0("data/",x,"_UTC_15 Mins_",c("Bid","Ask"),"_",start,"_",end,".csv"))

Load data and compute demeaned return

if (!dir.exists("data"))
  untar("data.tar.gz")
data <- do.call(cbind, lapply(apply(pair.files, 2, function(x) {
  a <- loadData(x[1]) # ask
  b <- loadData(x[2]) # bid
  p <- get.price(a,b) # (price = log(ask) + log(bid))/2
  r <- get.return(a,b) # return(t) = price(t) - price(t-1)
  return(list(p = p, r = r - mean(r)))
}), function(x) x$r))
colnames(data) <- pair.names

Get time, number of observations per day and number of observations

t <- index(data) # time index
p <- round(60*24/as.numeric(diff(t[1:2]))) # number of observations per day
n <- length(t) # number of observations

Marginal modeling

Fit intraday egarch models

# Takes a while, so better to save the models and load them next time... 
if (file.exists("data/GAM-PCC-intraday-fx-margins.rda")) {
  load("data/GAM-PCC-intraday-fx-margins.rda")
} else {
  cl <- makeCluster(detectCores()-1)
  registerDoParallel(cl)
  fits_margins <- as.list(numeric(dim(data)[2]))
  time_margins <- system.time(
    fits_margins <- foreach(i = 1:dim(data)[2], 
                    .packages = c("rugarch", "zoo")) %dopar% 
                    {fitIntradayEGARCH(data[,i],p)}
  )[3]
  closeAllConnections()
  save(fits_margins, time_margins, 
       file = "data/GAM-PCC-intraday-fx-margins.rda")
}

Results of marginal modeling

EURUSD

plot_margins(1, data, vol, "2013-03-10","2013-03-17")

plot of chunk marg1

USDJPY

plot_margins(2, data, vol, "2013-03-10","2013-03-17")

plot of chunk marg2

GBPUSD

plot_margins(3, data, vol, "2013-03-10","2013-03-17")

plot of chunk marg3

USDCHF

plot_margins(4, data, vol, "2013-03-10","2013-03-17")

plot of chunk marg4

Dependence modeling

Extract the volatilities, residuals and transform to copula scale

vol <- zoo(do.call(cbind,lapply(fits_margins, function(x) x$vol)), order = t)
res <- as.matrix(data)/as.matrix(vol)
U <- data.frame(apply(res, 2, rank)/(n + 1))
colnames(U) <- pair.names

Get the covariates

absolute_time <- seq(1,n)*24/p
time_of_day <- get.FFFXmat(data[,1], 5, p)

Select and fit the gamVine model

# Takes a while, so better to save the models and load them next time... 
if (file.exists("data/GAM-PCC-intraday-fx-copula.rda")) {
  load("data/GAM-PCC-intraday-fx-copula.rda")
} else {
  time_copula <- system.time(
    fit_copula <- gamVineStructureSelect(U, lin.covs = as.data.frame(time_of_day),
                                smooth.covs = data.frame(t = absolute_time),
                                simplified = TRUE, familycrit = "AIC",
                                method = "FS", n.iters = 40,
                                verbose = FALSE, parallel = TRUE)
  )[3]
  save(fit_copula, time_copula, 
       file = "data/GAM-PCC-intraday-fx-copula.rda")
}
fit_copula
## GAM-Vine matrix: 
##      [,1] [,2] [,3] [,4]
## [1,]    3    0    0    0
## [2,]    2    1    0    0
## [3,]    4    2    2    0
## [4,]    1    4    4    4
## 
##  Where 
## 1  <->  EURUSD 
## 2  <->  USDJPY 
## 3  <->  GBPUSD 
## 4  <->  USDCHF 
## 
##  Tree 1: 
## GBPUSD,EURUSD : t copula with tau(z) = (exp(z)-1)/(exp(z)+1) where 
## z ~ X1 + X2 + X3 + X4 + X5 + X6 + X8 + X9 + s(t, k = 40, bs = "cr")
## EURUSD,USDCHF : t copula with tau(z) = (exp(z)-1)/(exp(z)+1) where 
## z ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + s(t, k = 320, 
##     bs = "cr")
## USDJPY,USDCHF : t copula with tau(z) = (exp(z)-1)/(exp(z)+1) where 
## z ~ X1 + X2 + X3 + X4 + X6 + X7 + X9 + s(t, k = 80, bs = "cr")
## 
##  Tree 2: 
## GBPUSD,USDCHF|EURUSD : t copula with tau(z) = (exp(z)-1)/(exp(z)+1) where 
## z ~ X1 + X3 + X4 + s(t, k = 20, bs = "cr")
## EURUSD,USDJPY|USDCHF : t copula with tau(z) = (exp(z)-1)/(exp(z)+1) where 
## z ~ X1 + X2 + X3 + X4 + X9 + s(t, k = 40, bs = "cr")
## 
##  Tree 3: 
## GBPUSD,USDJPY|USDCHF,EURUSD : t copula with tau(z) = (exp(z)-1)/(exp(z)+1) where 
## z ~ X1 + X2 + X5 + X6 + s(t, k = 40, bs = "cr")

Parametric bootstrap for the confidence intervals

nBoot <- 2e2 # Number of bootstraped simulations
# Takes a while, so better to save the simulations and load them next time... 
if (file.exists("data/GAM-PCC-intraday-fx-bootstrap.rda")) {
  load("data/GAM-PCC-intraday-fx-bootstrap.rda")
} else {
  newdata <- data.frame(t = absolute_time, X <- time_of_day)
  cl <- makeCluster(detectCores()-1)
  registerDoParallel(cl)
  val.bs <- as.list(numeric(nBoot))
  time_bootstrap <- system.time(
    val.bs <- foreach(i = 1:nBoot, .packages = c("gamCopula")) %dopar% 
      {make_sim_model(i, fit_copula, newdata)}
  )[3]
  save(val.bs, time_bootstrap, 
       file = "data/GAM-PCC-intraday-fx-bootstrap.rda")
  closeAllConnections()
}

Results of copula modeling

# Extract mean and confidence intervals for the smooth and parameters
const <- smooth <- param <- list()
for (k in 1:length(val.bs[[1]])) {
  const[[k]] <- apply(sapply(val.bs, function(x) x[[k]][[1]]), 1, 
                      function(y) c(mean(y), quantile(y, c(0.025,0.975))))
  smooth[[k]] <- try(apply(sapply(val.bs, function(x) x[[k]][[2]]), 1, 
                           function(y) c(mean(y), quantile(y, c(0.025,0.975)))))
  param[[k]] <- try(apply(sapply(val.bs, function(x) x[[k]][[3]]), 1, 
                          function(y) c(mean(y), quantile(y, c(0.025,0.975)))))
}

GBPUSD,EURUSD

plot_copula(1, t, fit_copula, smooth, param, FALSE)

plot of chunk pc1

EURUSD,USDCHF

plot_copula(2, t, fit_copula, smooth, param, FALSE)

plot of chunk pc2

USDJPY,USDCHF

plot_copula(3, t, fit_copula, smooth, param, FALSE)

plot of chunk pc3

GBPUSD,USDCHF|EURUSD

plot_copula(4, t, fit_copula, smooth, param, FALSE)

plot of chunk pc4

EURUSD,USDJPY|USDCHF

plot_copula(5, t, fit_copula, smooth, param, FALSE)

plot of chunk pc5

GBPUSD,USDJPY|USDCHF,EURUSD

plot_copula(6, t, fit_copula, smooth, param, FALSE)

plot of chunk pc6

Computing times

computing_times <- data.frame(margins = time_margins, 
                              copula = time_copula,
                              bootstrap = time_bootstrap)/60
knitr::kable(computing_times, digits = 1)
margins copula bootstrap
elapsed 0.9 4 67.2

Environment info

sessionInfo()
## R version 3.4.1 (2017-06-30)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 18.1
## 
## Matrix products: default
## BLAS: /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.18.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=de_DE.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=de_DE.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] doParallel_1.0.10 iterators_1.0.8   foreach_1.4.3    
##  [4] gamCopula_0.0-3   rugarch_1.3-6     dplyr_0.7.2      
##  [7] plyr_1.8.4        pryr_0.1.2        xts_0.10-0       
## [10] zoo_1.8-0        
## 
## loaded via a namespace (and not attached):
##  [1] rgl_0.98.1                  Rcpp_0.12.12               
##  [3] ADGofTest_0.3               mvtnorm_1.0-6              
##  [5] lattice_0.20-35             FNN_1.1                    
##  [7] rprojroot_1.2               assertthat_0.2.0           
##  [9] digest_0.6.12               mime_0.5                   
## [11] truncnorm_1.0-7             R6_2.2.2                   
## [13] backports_1.1.0             stats4_3.4.1               
## [15] pcaPP_1.9-72                evaluate_0.10.1            
## [17] highr_0.6                   rlang_0.1.1.9000           
## [19] misc3d_0.8-4                nloptr_1.0.4               
## [21] SkewHyperbolic_0.3-2        Matrix_1.2-10              
## [23] rmarkdown_1.6               stringr_1.2.0              
## [25] htmlwidgets_0.9             igraph_1.1.2               
## [27] shiny_1.0.3                 compiler_3.4.1             
## [29] numDeriv_2016.8-1           httpuv_1.3.5               
## [31] pkgconfig_2.0.1             DistributionUtils_0.5-1    
## [33] gsl_1.9-10.3                mgcv_1.8-18                
## [35] htmltools_0.3.6             Rsolnp_1.16                
## [37] multicool_0.1-10            tibble_1.3.3               
## [39] VineCopula_2.1.2            expm_0.999-2               
## [41] codetools_0.2-15            stabledist_0.7-1           
## [43] pspline_1.0-18              GeneralizedHyperbolic_0.8-1
## [45] MASS_7.3-47                 grid_3.4.1                 
## [47] nlme_3.1-131                jsonlite_1.5               
## [49] xtable_1.8-2                magrittr_1.5               
## [51] KernSmooth_2.23-15          stringi_1.1.5              
## [53] bindrcpp_0.2                spd_2.0-1                  
## [55] tools_3.4.1                 copula_0.999-17            
## [57] glue_1.1.1                  ks_1.10.7                  
## [59] network_1.13.0              yaml_2.1.14                
## [61] knitr_1.16                  bindr_0.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment