Thomas Nagler 15 May, 2017
Required packages:
library("kdecopula")
library("VineCopula")
library("np")
library("ks")
library("penDvine")
library("GMCM")
library("rafalib")
library("doParallel")
library("dplyr")
library("tidyr")
library("tibble")
library("ggplot2")
make_sigma_mat <- function(v1, v2, rho) {
s12 <- sqrt(v1 * v2) * rho
matrix(c(v1, s12, s12, v2), 2, 2)
}
make_nmix_model <- function(model_name) {
model <- switch(
model_name,
"Mixture I" = list(
m = 2,
d = 2,
pie = c(1/2, 1/2),
mu = list(c(0, -1.5), c(1, 1)),
sigma = list(
make_sigma_mat(0.5, 0.5, 0.3),
make_sigma_mat(0.5, 0.5, 0.8)
)
),
"Mixture II" = list(
m = 3,
d = 2,
pie = c(2/5, 2/5, 1/5),
mu = list(c(0, -1), c(0, 1), c(2, 1)),
sigma = list(
make_sigma_mat(0.5, 0.5, -0.8),
make_sigma_mat(0.5, 0.5, 0.2),
make_sigma_mat(0.5, 0.5, 0.8)
)
),
"Mixture III" = list(
m = 2,
d = 2,
pie = c(1/2, 1/2),
mu = list(c(1, -1), c(-1, 1)),
sigma = list(
make_sigma_mat(0.5, 0.5, 0.6),
make_sigma_mat(0.5, 0.5, 0.6)
)
),
stop("model not implemented")
)
structure(model, class = "cop_nmix")
}
# based on plot.kdecopula
contour_cop_nmix <- function(x, ...) {
size <- 100
xylim <- c(-3, 3)
points <- pnorm(seq(xylim[1], xylim[2], length.out = size))
g <- as.matrix(expand.grid(points, points))
points <- qnorm(g[1:size, 1])
## evaluate on grid
vals <- exp(GMCM:::dgmcm.loglik(x, g, marginal.loglik = TRUE))
cop <- matrix(vals, size, size)
# set default parameters
pars <- list(x = points,
y = points,
z = cop * tcrossprod(dnorm(points)),
levels = c(0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5),
xlim = xylim,
ylim = xylim,
xlab = expression(z[1]),
ylab = expression(z[2]))
# call contour with final parameters
do.call(contour, modifyList(pars, list(...)))
}
sim_from_model <- function(n, model) {
if (class(model) == "cop_nmix") {
return(GMCM::SimulateGMCMData(n, theta = model)$u)
} else {
return(VineCopula::BiCopSim(n, model))
}
}
get_true_c <- function(model) {
g <- as.matrix(expand.grid(u = 1:100/101, v = 1:100/101))
if (class(model) == "cop_nmix") {
return(exp(GMCM:::dgmcm.loglik(model, g, marginal.loglik = TRUE)))
} else {
return(VineCopula::BiCopPDF(g[, 1], g[, 2], model))
}
}
fit_and_eval <- function(u, method) {
## evaluation grids
g <- as.matrix(expand.grid(u = 1:100/101, v = 1:100/101))
g_np <- cbind(u = 1:100/101, v = 1:100/101)
## fit and evaluate methods
if (method %in% c(kdecopula_methods)) {
fit <- kdecop(u,
method = gsub("0", "", method),
renorm.iter = ifelse(grepl("0", method), 0, 3))
c_method <- dkdecop(g, fit)
} else if (method == "np") {
df <- data.frame(u1 = u[, 1], u2 = u[, 2])
tmp <- capture.output({
fit <- npudensbw(~ u1 + u2, data = df)
c_method <- npcopula(fit, data = df, u = g_np)[, 1]
})
} else if (method == "ks") {
fit <- suppressWarnings(kcopula.de(u, Hpi(u)))
c_method <- predict(fit, u = g)
} else if (method == "bs") {
suppressMessages(fit <- paircopula(u, base = "B-spline"))
c_method <- plot(fit, val = g, plot.view = FALSE)$fit
} else if (method == "bern") {
suppressMessages(fit <- paircopula(u, base = "Bernstein"))
c_method <- plot(fit, val = g, plot.view = FALSE)$fit
}
c_method
}
do_one <- function(seed, n, model, method) {
## create model object
true_model <- switch(
model,
"Independence" = VineCopula::BiCop(0, 0),
"Gauss-0.3" = VineCopula::BiCop(1, tau = 0.3),
"Gauss-0.7" = VineCopula::BiCop(1, tau = 0.7),
"Gumbel-0.3" = VineCopula::BiCop(1, tau = 0.3),
"Gumbel-0.7" = VineCopula::BiCop(4, tau = 0.7),
"Mixture I" = make_nmix_model(model),
"Mixture II" = make_nmix_model(model),
"Mixture III" = make_nmix_model(model)
)
## simulate data
set.seed(seed)
u_sim <- sim_from_model(n, true_model)
## fit and evaluate estimator
t_method <- try(
system.time(c_method <- fit_and_eval(u_sim, method))[3]
)
if (inherits(t_method, "try-error")) {
t_method <- c_method <- NA
}
## return results
c_true <- get_true_c(true_model)
data.frame(
n = n,
model = model,
method = method,
seed = seed,
IAE = mean(abs(c_method - c_true)),
time = unname(t_method)
)
}
# register parallel backend
cl <- makeCluster(detectCores(), outfile = "")
registerDoParallel(cl)
kdecopula_methods <- c(
"MR", "beta", "T", "TLL1", "TLL2", "TLL1nn", "TLL2nn", "TTPI", "TTCV",
"MR0", "beta0", "T0", "TLL10", "TLL20", "TLL1nn0", "TLL2nn0", "TTPI0", "TTCV0"
)
other_methods <- c("np", "ks", "bs", "bern")
models <- c("Independence", "Gauss-0.3", "Gauss-0.7", "Gumbel-0.3", "Gumbel-0.7",
"Mixture I", "Mixture II", "Mixture III")
all_results <- NULL
for (n in c(200, 1000)) {
message("* n = ", n, ":")
for (mod in models) {
message(" * model: ", mod)
for (meth in c(kdecopula_methods, other_methods)) {
new_result <- foreach(
seed = 1:100,
.packages = c("VineCopula", "kdecopula", "np", "ks", "penDvine"),
.combine = rbind
) %dopar% {
do_one(seed, n, mod, meth)
}
all_results <- rbind(all_results, new_result)
}
}
}
## * n = 200:
## * model: Independence
## * model: Gauss-0.3
## * model: Gauss-0.7
## * model: Gumbel-0.3
## * model: Gumbel-0.7
## * model: Mixture I
## * model: Mixture II
## * model: Mixture III
## * n = 1000:
## * model: Independence
## * model: Gauss-0.3
## * model: Gauss-0.7
## * model: Gumbel-0.3
## * model: Gumbel-0.7
## * model: Mixture I
## * model: Mixture II
## * model: Mixture III
saveRDS(all_results, "JSS-sim-results.rds")
method_order <- c(
"MR", "beta", "T", "TLL1", "TLL2", "TLL1nn", "TLL2nn", "TTCV", "TTPI",
"np", "ks", "bern", "bs"
)
all_results %>%
filter(!grepl("0", method)) %>%
group_by(model, n, method) %>%
summarize_all(mean) %>%
mutate(
rank = rank(IAE),
method = factor(method, levels = method_order),
MIAE = IAE) %>%
ggplot(aes(method, MIAE, fill = -rank, alpha = as.factor(n))) +
scale_fill_continuous() +
scale_alpha_manual(values = c(0.6, 1)) +
geom_bar(stat = "identity", width = 1.7, position = position_dodge(0)) +
facet_grid(model ~ ., scales = "free_y") +
geom_vline(xintercept = 9.5, linetype = 1, size = 0.2) +
theme_bw() +
theme(legend.position = "none") +
theme(strip.text = element_text(size = 12, angle = 90))
all_results %>%
select(-time) %>%
spread(method, IAE) %>%
mutate(
MR = 100 * (1 - MR / MR0),
beta = 100 * (1 - beta / beta0),
T = 100 * (1 - T / T0),
TLL1 = 100 * (1 - TLL1 / TLL10),
TLL2 = 100 * (1 - TLL2 / TLL20),
TLL1nn = 100 * (1 - TLL1nn / TLL1nn0),
TLL2nn = 100 * (1 - TLL2nn / TLL2nn0),
TTCV = 100 * (1 - TTCV / TTCV0),
TTPI = 100 * (1 - TTPI / TTPI0)
) %>%
select(model, n, seed, MR, beta, T, TLL1, TLL2, TLL1nn, TLL2nn, TTCV, TTPI) %>%
gather(method, improvement, -model, -n, -seed) %>%
mutate(method = factor(method, levels = method_order)) %>%
group_by(method) %>%
summarize(improvement = round(mean(improvement))) %>%
na.omit() %>% # 0-iteration versions result in an NA group
spread(method, improvement) %>%
knitr::kable()
MR | beta | T | TLL1 | TLL2 | TLL1nn | TLL2nn | TTCV | TTPI |
---|---|---|---|---|---|---|---|---|
13 | 15 | 25 | 39 | 21 | 28 | 29 | 24 | 22 |
all_results %>%
select(-IAE) %>%
mutate(method = factor(method, levels = method_order)) %>%
filter(method %in% c("MR", "beta", "T", "TLL2", "TLL2nn", "TTCV", "TTPI",
"np", "ks", "bern", "bs")) %>%
group_by(n, method) %>%
summarize(time = round(mean(time), 2)) %>%
na.omit() %>%
spread(method, time) %>%
knitr::kable()
n | MR | beta | T | TLL2 | TLL2nn | TTCV | TTPI | np | ks | bern | bs |
---|---|---|---|---|---|---|---|---|---|---|---|
200 | 0.11 | 0.26 | 0.07 | 0.31 | 0.62 | 3.76 | 0.35 | 3.03 | 4.61 | 1.88 | 2.10 |
1000 | 0.37 | 0.99 | 0.11 | 0.68 | 1.30 | 58.10 | 13.63 | 37.84 | 41.96 | 3.08 | 3.45 |
sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-pc-linux-gnu/x86_64 (64-bit)
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel splines stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ggplot2_2.2.1 tibble_1.3.0 tidyr_0.6.1
## [4] dplyr_0.5.0 doParallel_1.0.10 iterators_1.0.8
## [7] rafalib_1.0.0 GMCM_1.2.4 penDvine_0.2.4
## [10] foreach_1.4.3 fda_2.4.4 Matrix_1.2-6
## [13] TSP_1.1-4 lattice_0.20-35 ks_1.10.6
## [16] mvtnorm_1.0-5 np_0.60-3 VineCopula_2.1.2.9000
## [19] kdecopula_0.9.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.10 ADGofTest_0.3 locfit_1.5-9.1
## [4] FNN_1.1 assertthat_0.2.0 digest_0.6.9
## [7] mime_0.5 R6_2.2.0 plyr_1.8.4
## [10] MatrixModels_0.4-1 stats4_3.3.1 pcaPP_1.9-61
## [13] evaluate_0.10 highr_0.6 qrng_0.0-3
## [16] lazyeval_0.2.0 misc3d_0.8-4 cubature_1.1-2
## [19] SparseM_1.7 labeling_0.3 stringr_1.0.0
## [22] htmlwidgets_0.8 munsell_0.4.3 shiny_0.14.2
## [25] compiler_3.3.1 numDeriv_2014.2-1 httpuv_1.3.3
## [28] gsl_1.9-10.1 htmltools_0.3.5 multicool_0.1-2
## [31] quadprog_1.5-5 codetools_0.2-14 stabledist_0.7-0
## [34] pspline_1.0-17 MASS_7.3-45 grid_3.3.1
## [37] jsonlite_1.1 xtable_1.8-2 gtable_0.2.0
## [40] DBI_0.6-1 magrittr_1.5 scales_0.4.1
## [43] KernSmooth_2.23-15 stringi_1.1.1 reshape2_1.4.1
## [46] latticeExtra_0.6-28 boot_1.3-18 RColorBrewer_1.1-2
## [49] tools_3.3.1 copula_0.999-16 network_1.13.0
## [52] colorspace_1.2-6 knitr_1.15 quantreg_5.33