title | subtitle | author | date | output |
---|---|---|---|---|
Appendix to "Generalized Additive Models for Pair-Copula Constructions" |
Code for the intraday FX application |
Thibault Vatter and Thomas Nagler |
15 August, 2017 |
github_document |
pack <- c('zoo','xts','pryr', 'plyr', 'dplyr','rugarch',
'gamCopula', 'parallel', 'doParallel')
for (p in pack) {
library(p, character.only = TRUE)
}
loadData <- function(file) {
data <- read.csv(file, sep = ",")
data$Time <- as.POSIXct(strptime(data$Time, format="%Y.%m.%e %H:%M:%S", tz = "CET"))
data <- as.xts(data[,2:6], order.by=data$Time)
}
get.return <- function(ask, bid){
p <- (log(ask$Close)+log(bid$Close))/2
return(diff(p)[-1])
}
get.price <- function(ask, bid){
p <- (log(ask$Close)+log(bid$Close))/2
return(p)
}
get.FFFXmat <- function(r, K, n, dayofweek = FALSE){
rr <- index(r)
tt <- c((1:length(r))/n) %% 1
d <- as.numeric(format(as.POSIXct(as.numeric(rr)+2*3600,
origin = "1970-01-01 00:00.00"), "%w"))-1
if(dayofweek == FALSE){
X <- matrix(0, ncol = 2*K, nrow = length(tt))
}else{
X <- matrix(0, ncol = 2*K+4, nrow = length(tt))
}
for(j in 1:K){
X[,j] <- cos(2*pi*j*tt)
X[,j+K] <- sin(2*pi*j*tt)
}
if(dayofweek == TRUE){
X <- t(apply(cbind(X,d), 1, function(x){
y <- x[1:(2*K+4)]
if(x[2*K+4+1]>0){y[2*K+x[2*K+4+1]] <- 1}
return(y)}))
}
m <- dim(X)[2]
colnames(X) <- sapply(1:m, function(x) paste("X",x,sep = ""))
return(X)
}
fitIntradayEGARCH <- function(r, p = 288) {
## External regressors
X <- get.FFFXmat(r, 5, p)
## Exponential GARCH
spec <- ugarchspec(variance.model = list(model = "eGARCH",
external.regressors = X),
mean.model = list(include.mean = FALSE))
## Fit the model and extract the spot volatility
out <- list(fit=ugarchfit(spec, as.numeric(r)))
out$vol <- zoo(sigma(out$fit), order = index(r))
out$res <- zoo(residuals(out$fit), order = index(r))
return(out)
}
make_sim_model <- function(i, GVC, newdata) {
set.seed(i)
# Simulates from the model
u.i <- data.frame(gamVineSimulate(GVC = GVC, newdata = newdata), newdata)
# Re-estimates the parameters
fit.i <- try(gamVineSeqFit(u.i, GVC, covariates = names(newdata) ,
method = "FS", n.iters = 40))
# Obtain the new estimates
val.i <- try(lapply(fit.i@model, function(x)
gamBiCopPredict(x, newdata = newdata, type = "terms", target = "calib")$calib))
terms.i <- sapply(val.i, function(y)
sapply(colnames(y), function(x) length(strsplit(x, "X")[[1]])))
val.i <- try(lapply(1:length(val.i), function(x)
list(const = rep(attr(val.i[[x]], "constant"), nrow(newdata)),
smooth = val.i[[x]][,terms.i[[x]] == 1],
beta = apply(val.i[[x]][,terms.i[[x]] == 2],1,sum))))
out <- if (inherits(val.i, "try-error")) NULL else val.i
out
}
plot_margins <- function(j, data, vol, start, end) {
tt <- index(data %>% window(., start = start, end = end))
ix <- which(is.element(tt,seq(min(tt)+3600*3, max(tt), 3600*24)))
p <- round(60*24/as.numeric(diff(tt[1:2])))
fmt <- "%a %m-%d"
options(scipen=0)
par(mfrow = c(1,3), mar = c(2.5, 2.5, 1.6, 1.1), font.main = 1, cex.main = 1)
plot(tt, as.numeric(data[tt,j]),
ylab = "", main = "Return", xlab = "Date (days)",
ylim = c(-3e-3,3e-3), xaxt = "n", lwd = 1.3, type = "l")
lines(tt, as.numeric(2*vol[tt,j]), col = "red", lwd = 1.3)
axis(side = 1, at = tt[ix], labels = format(tt[ix], fmt))
a1 <- apply(abs(data), 2,function(x)
acf(x, 5*p, plot = FALSE)$acf[-1])
a2 <- apply(abs(as.matrix(data)/as.matrix(vol)), 2,function(x)
acf(x, 5*p, plot = FALSE)$acf[-1])
plot((1:(5*p))/p, a1[,j],
type = "l", main = "Autocorrelation",
xlab = "Lag (days)", ylab = "", ylim = c(-0.1,0.3),
lwd = 1.3)
lines((1:(5*p))/p, a2[,j], col = "red",
lwd = 1.3)
s <- apply(vol, 2, function(x)
apply(matrix(c(0,x), ncol = p, byrow = T), 2, mean))
e <- apply(data, 2, function(x)
apply(matrix(c(0,x), ncol = p, byrow = T), 2, sd))
hh <- data.frame(h = as.POSIXlt(t[1:p])$hour, m = as.POSIXlt(t[1:p])$min)
sel <- order(hh$h,hh$m)
h <- hh[sel,]$h+hh[sel,]$m/60
plot(h, as.numeric(e[sel,j]),
type = "l", main = "Periodic Component",
xlab = "Time of Day (hours)", ylab = "", ylim = c(2e-4,1.5e-3),
lwd = 1.3, col = "black", xaxt = "n")
lines(h, as.numeric(s[sel,j]), col = "red",
lwd = 1.3)
axis(1, at = c(0, 4, 8, 12, 16, 20,24), labels = c(0, 4, 8, 12, 16, 20,24))
}
plot_copula <- function(j, tt, GVC, smooth, param, plotempirical) {
ix <- as.character(unique(cut(tt,'month')))
ix <- sapply(ix[seq(2,length(ix),2)], function(m)
which(cut(tt,'month') == m)[1])
tmp <- zoo(t(param[[j]]), order = tt)
tau <- tanh((const[[j]][1,] + tmp[, 1] + smooth[[j]][1,])/2)
tau2 <- tanh((const[[j]][1,] + smooth[[j]][1,])/2)
par(mfrow = c(1,2), mar = c(2.5, 2.5, 1.6, 1.1))
plot(tt, tau,
ylab = "", main = "Kendall's Tau", xlab = "Time (months)",
type = "l", ylim = c(-1,1), col = "black")
lines(tt, tau, lwd = 1.3)
polygon(c(tt,rev(tt)),c(as.vector(tanh((const[[j]][2,] + smooth[[j]][2,])/2)),
rev(as.vector(tanh((const[[j]][3,] + smooth[[j]][3,])/2)))),
border = NA, col = gray(0.85, 0.8))
lines(tt, tau2, lwd = 1.3, col = "red")
ts <- c(12:96, 1:11)
U <- cbind(GVC@model[[j]]@model$data$u1,GVC@model[[j]]@model$data$u2)
p <- 96
s <- sapply(ts, function(j) {
sel <- seq(1,dim(U)[1],p) + (j-1)
if (j == p) {
sel <- sel[1:(length(sel)-1)]
}
atanh(cor(U[sel,], method = "kendall")[2])*2
})
plot(1:96, as.vector(tmp[, 1])[ts], ylab = "",
main = "Periodic Component", xlab = "Time of Day (hours)", xaxt = "n",
type = "l", ylim = c(-0.5,0.5))
axis(1, at = c(1, 1 + (1:6) * 96/6), labels = c(0, 4, 8, 12, 16, 20,24))
polygon(c(1:96, c(rev(12:96), rev(1:11))),
c(as.vector(tmp[, 2])[ts], rev(as.vector(tmp[, 3])[ts])),
border = NA, col = gray(0.85, 0.8))
lines(as.vector(tmp[, 1])[ts], lwd = 1.3)
if (plotempirical == TRUE) {
points(1:96,s-mean(s))
}
abline(h = 0, lty = 3)
ss <- 0.05
# london
lines(rep(4 * 7, 2), c(0.4, 0.36)+ss, lty = 1)
lines(rep(4 * 16, 2), c(0.4, 0.36)+ss, lty = 1)
lines(4 * c(7, 16), rep(0.38, 2)+ss, lty = 1)
text(4 * mean(c(7, 16)), 0.365+ss, "London", pos = 3)
# new york
lines(rep(4 * 13, 2), c(0.32, 0.28)+ss, lty = 1)
lines(rep(4 * 21, 2), c(0.32, 0.28)+ss, lty = 1)
lines(4 * c(13, 21), rep(0.30, 2)+ss, lty = 1)
text(4 * mean(c(13, 21)), 0.285+ss, "New York", pos = 3)
# tokyo
lines(rep(4 * 23, 2), c(0.38, 0.34)+ss, lty = 1)
lines(rep(4 * 7, 2), c(0.38, 0.34)+ss, lty = 1)
lines(c(0, 4 * 7), rep(0.36, 2)+ss, lty = 1)
lines(c(4 * 23, 24 * 7), rep(0.36, 2)+ss, lty = 1)
text(4 * mean(c(-1, 7)), 0.345+ss, "Tokyo", pos = 3)
}
pair.names <- c("EURUSD", "USDJPY", "GBPUSD", "USDCHF")
start <- "2013.03.10"
end <- "2013.11.02"
pair.files <- sapply(pair.names, function(x)
paste0("data/",x,"_UTC_15 Mins_",c("Bid","Ask"),"_",start,"_",end,".csv"))
if (!dir.exists("data"))
untar("data.tar.gz")
data <- do.call(cbind, lapply(apply(pair.files, 2, function(x) {
a <- loadData(x[1]) # ask
b <- loadData(x[2]) # bid
p <- get.price(a,b) # (price = log(ask) + log(bid))/2
r <- get.return(a,b) # return(t) = price(t) - price(t-1)
return(list(p = p, r = r - mean(r)))
}), function(x) x$r))
colnames(data) <- pair.names
t <- index(data) # time index
p <- round(60*24/as.numeric(diff(t[1:2]))) # number of observations per day
n <- length(t) # number of observations
# Takes a while, so better to save the models and load them next time...
if (file.exists("data/GAM-PCC-intraday-fx-margins.rda")) {
load("data/GAM-PCC-intraday-fx-margins.rda")
} else {
cl <- makeCluster(detectCores()-1)
registerDoParallel(cl)
fits_margins <- as.list(numeric(dim(data)[2]))
time_margins <- system.time(
fits_margins <- foreach(i = 1:dim(data)[2],
.packages = c("rugarch", "zoo")) %dopar%
{fitIntradayEGARCH(data[,i],p)}
)[3]
closeAllConnections()
save(fits_margins, time_margins,
file = "data/GAM-PCC-intraday-fx-margins.rda")
}
plot_margins(1, data, vol, "2013-03-10","2013-03-17")
plot_margins(2, data, vol, "2013-03-10","2013-03-17")
plot_margins(3, data, vol, "2013-03-10","2013-03-17")
plot_margins(4, data, vol, "2013-03-10","2013-03-17")
vol <- zoo(do.call(cbind,lapply(fits_margins, function(x) x$vol)), order = t)
res <- as.matrix(data)/as.matrix(vol)
U <- data.frame(apply(res, 2, rank)/(n + 1))
colnames(U) <- pair.names
absolute_time <- seq(1,n)*24/p
time_of_day <- get.FFFXmat(data[,1], 5, p)
# Takes a while, so better to save the models and load them next time...
if (file.exists("data/GAM-PCC-intraday-fx-copula.rda")) {
load("data/GAM-PCC-intraday-fx-copula.rda")
} else {
time_copula <- system.time(
fit_copula <- gamVineStructureSelect(U, lin.covs = as.data.frame(time_of_day),
smooth.covs = data.frame(t = absolute_time),
simplified = TRUE, familycrit = "AIC",
method = "FS", n.iters = 40,
verbose = FALSE, parallel = TRUE)
)[3]
save(fit_copula, time_copula,
file = "data/GAM-PCC-intraday-fx-copula.rda")
}
fit_copula
## GAM-Vine matrix:
## [,1] [,2] [,3] [,4]
## [1,] 3 0 0 0
## [2,] 2 1 0 0
## [3,] 4 2 2 0
## [4,] 1 4 4 4
##
## Where
## 1 <-> EURUSD
## 2 <-> USDJPY
## 3 <-> GBPUSD
## 4 <-> USDCHF
##
## Tree 1:
## GBPUSD,EURUSD : t copula with tau(z) = (exp(z)-1)/(exp(z)+1) where
## z ~ X1 + X2 + X3 + X4 + X5 + X6 + X8 + X9 + s(t, k = 40, bs = "cr")
## EURUSD,USDCHF : t copula with tau(z) = (exp(z)-1)/(exp(z)+1) where
## z ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + s(t, k = 320,
## bs = "cr")
## USDJPY,USDCHF : t copula with tau(z) = (exp(z)-1)/(exp(z)+1) where
## z ~ X1 + X2 + X3 + X4 + X6 + X7 + X9 + s(t, k = 80, bs = "cr")
##
## Tree 2:
## GBPUSD,USDCHF|EURUSD : t copula with tau(z) = (exp(z)-1)/(exp(z)+1) where
## z ~ X1 + X3 + X4 + s(t, k = 20, bs = "cr")
## EURUSD,USDJPY|USDCHF : t copula with tau(z) = (exp(z)-1)/(exp(z)+1) where
## z ~ X1 + X2 + X3 + X4 + X9 + s(t, k = 40, bs = "cr")
##
## Tree 3:
## GBPUSD,USDJPY|USDCHF,EURUSD : t copula with tau(z) = (exp(z)-1)/(exp(z)+1) where
## z ~ X1 + X2 + X5 + X6 + s(t, k = 40, bs = "cr")
nBoot <- 2e2 # Number of bootstraped simulations
# Takes a while, so better to save the simulations and load them next time...
if (file.exists("data/GAM-PCC-intraday-fx-bootstrap.rda")) {
load("data/GAM-PCC-intraday-fx-bootstrap.rda")
} else {
newdata <- data.frame(t = absolute_time, X <- time_of_day)
cl <- makeCluster(detectCores()-1)
registerDoParallel(cl)
val.bs <- as.list(numeric(nBoot))
time_bootstrap <- system.time(
val.bs <- foreach(i = 1:nBoot, .packages = c("gamCopula")) %dopar%
{make_sim_model(i, fit_copula, newdata)}
)[3]
save(val.bs, time_bootstrap,
file = "data/GAM-PCC-intraday-fx-bootstrap.rda")
closeAllConnections()
}
# Extract mean and confidence intervals for the smooth and parameters
const <- smooth <- param <- list()
for (k in 1:length(val.bs[[1]])) {
const[[k]] <- apply(sapply(val.bs, function(x) x[[k]][[1]]), 1,
function(y) c(mean(y), quantile(y, c(0.025,0.975))))
smooth[[k]] <- try(apply(sapply(val.bs, function(x) x[[k]][[2]]), 1,
function(y) c(mean(y), quantile(y, c(0.025,0.975)))))
param[[k]] <- try(apply(sapply(val.bs, function(x) x[[k]][[3]]), 1,
function(y) c(mean(y), quantile(y, c(0.025,0.975)))))
}
plot_copula(1, t, fit_copula, smooth, param, FALSE)
plot_copula(2, t, fit_copula, smooth, param, FALSE)
plot_copula(3, t, fit_copula, smooth, param, FALSE)
plot_copula(4, t, fit_copula, smooth, param, FALSE)
plot_copula(5, t, fit_copula, smooth, param, FALSE)
plot_copula(6, t, fit_copula, smooth, param, FALSE)
computing_times <- data.frame(margins = time_margins,
copula = time_copula,
bootstrap = time_bootstrap)/60
knitr::kable(computing_times, digits = 1)
margins | copula | bootstrap | |
---|---|---|---|
elapsed | 0.9 | 4 | 67.2 |
sessionInfo()
## R version 3.4.1 (2017-06-30)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 18.1
##
## Matrix products: default
## BLAS: /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.18.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=de_DE.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=de_DE.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] doParallel_1.0.10 iterators_1.0.8 foreach_1.4.3
## [4] gamCopula_0.0-3 rugarch_1.3-6 dplyr_0.7.2
## [7] plyr_1.8.4 pryr_0.1.2 xts_0.10-0
## [10] zoo_1.8-0
##
## loaded via a namespace (and not attached):
## [1] rgl_0.98.1 Rcpp_0.12.12
## [3] ADGofTest_0.3 mvtnorm_1.0-6
## [5] lattice_0.20-35 FNN_1.1
## [7] rprojroot_1.2 assertthat_0.2.0
## [9] digest_0.6.12 mime_0.5
## [11] truncnorm_1.0-7 R6_2.2.2
## [13] backports_1.1.0 stats4_3.4.1
## [15] pcaPP_1.9-72 evaluate_0.10.1
## [17] highr_0.6 rlang_0.1.1.9000
## [19] misc3d_0.8-4 nloptr_1.0.4
## [21] SkewHyperbolic_0.3-2 Matrix_1.2-10
## [23] rmarkdown_1.6 stringr_1.2.0
## [25] htmlwidgets_0.9 igraph_1.1.2
## [27] shiny_1.0.3 compiler_3.4.1
## [29] numDeriv_2016.8-1 httpuv_1.3.5
## [31] pkgconfig_2.0.1 DistributionUtils_0.5-1
## [33] gsl_1.9-10.3 mgcv_1.8-18
## [35] htmltools_0.3.6 Rsolnp_1.16
## [37] multicool_0.1-10 tibble_1.3.3
## [39] VineCopula_2.1.2 expm_0.999-2
## [41] codetools_0.2-15 stabledist_0.7-1
## [43] pspline_1.0-18 GeneralizedHyperbolic_0.8-1
## [45] MASS_7.3-47 grid_3.4.1
## [47] nlme_3.1-131 jsonlite_1.5
## [49] xtable_1.8-2 magrittr_1.5
## [51] KernSmooth_2.23-15 stringi_1.1.5
## [53] bindrcpp_0.2 spd_2.0-1
## [55] tools_3.4.1 copula_0.999-17
## [57] glue_1.1.1 ks_1.10.7
## [59] network_1.13.0 yaml_2.1.14
## [61] knitr_1.16 bindr_0.1