tnagler/JSS-sim.md

## JSS-sim.md

      
    Raw
  

              JSS-sim.md
            
          
    Simulation study for kdecopula vignette

Thomas Nagler
15 May, 2017
Required packages:
library("kdecopula")
library("VineCopula")
library("np")
library("ks")
library("penDvine")
library("GMCM")
library("rafalib")
library("doParallel")
library("dplyr")
library("tidyr")
library("tibble")
library("ggplot2")
Utility functions for Gaussian mixtures

make_sigma_mat <- function(v1, v2, rho) {
    s12 <- sqrt(v1 * v2) * rho
    matrix(c(v1, s12, s12, v2), 2, 2)
}

make_nmix_model <- function(model_name) {
    model <- switch(
        model_name,
        "Mixture I" = list(
            m = 2,
            d = 2,
            pie = c(1/2, 1/2),
            mu = list(c(0, -1.5), c(1, 1)),
            sigma = list(
                make_sigma_mat(0.5, 0.5, 0.3),
                make_sigma_mat(0.5, 0.5, 0.8)
            )
        ),
        "Mixture II" = list(
            m = 3,
            d = 2,
            pie = c(2/5, 2/5, 1/5),
            mu = list(c(0, -1), c(0, 1), c(2, 1)),
            sigma = list(
                make_sigma_mat(0.5, 0.5, -0.8),
                make_sigma_mat(0.5, 0.5, 0.2),
                make_sigma_mat(0.5, 0.5, 0.8)
            )
        ),
        "Mixture III" = list(
            m = 2,
            d = 2,
            pie = c(1/2, 1/2),
            mu = list(c(1, -1), c(-1, 1)),
            sigma = list(
                make_sigma_mat(0.5, 0.5, 0.6),
                make_sigma_mat(0.5, 0.5, 0.6)
            )
        ),
        stop("model not implemented")
    )
    structure(model, class = "cop_nmix")
}
# based on plot.kdecopula
contour_cop_nmix <- function(x, ...) {
    size <- 100
    xylim <- c(-3, 3)
    points <- pnorm(seq(xylim[1], xylim[2], length.out = size))
    g <- as.matrix(expand.grid(points, points))
    points <- qnorm(g[1:size, 1])
    
    ## evaluate on grid
    vals <- exp(GMCM:::dgmcm.loglik(x, g, marginal.loglik = TRUE))
    cop <- matrix(vals, size, size)
    
    # set default parameters
    pars <- list(x = points,
                 y = points,
                 z = cop * tcrossprod(dnorm(points)),
                 levels = c(0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5),
                 xlim = xylim,
                 ylim = xylim,
                 xlab = expression(z[1]),
                 ylab = expression(z[2]))
    
    # call contour with final parameters
    do.call(contour, modifyList(pars, list(...)))
}
Utility functions for simulation study

sim_from_model <- function(n, model) {
    if (class(model) == "cop_nmix") {
        return(GMCM::SimulateGMCMData(n, theta = model)$u)
    } else {
        return(VineCopula::BiCopSim(n, model))
    }
}

get_true_c <- function(model) {
    g <- as.matrix(expand.grid(u = 1:100/101, v = 1:100/101))
    if (class(model) == "cop_nmix") {
        return(exp(GMCM:::dgmcm.loglik(model, g, marginal.loglik = TRUE)))
    } else {
        return(VineCopula::BiCopPDF(g[, 1], g[, 2], model))
    }
}

fit_and_eval <- function(u, method) {
    ## evaluation grids
    g <- as.matrix(expand.grid(u = 1:100/101, v = 1:100/101))
    g_np <- cbind(u = 1:100/101, v = 1:100/101)
    
    ## fit and evaluate methods
    if (method %in% c(kdecopula_methods)) {
        fit <- kdecop(u,
                      method = gsub("0", "", method),
                      renorm.iter = ifelse(grepl("0", method), 0, 3))
        c_method <- dkdecop(g, fit)
    } else if (method == "np") {
        df <- data.frame(u1 = u[, 1], u2 = u[, 2])
        tmp <- capture.output({
            fit <- npudensbw(~ u1 + u2, data = df)
            c_method <- npcopula(fit, data = df, u = g_np)[, 1]
            
        })
    } else if (method == "ks") {
        fit <- suppressWarnings(kcopula.de(u, Hpi(u)))
        c_method <- predict(fit, u = g)
    } else if (method == "bs") {
        suppressMessages(fit <- paircopula(u, base = "B-spline"))
        c_method <- plot(fit, val = g, plot.view = FALSE)$fit
    } else if (method == "bern") {
        suppressMessages(fit <- paircopula(u, base = "Bernstein"))
        c_method <- plot(fit, val = g, plot.view = FALSE)$fit
    }
    
    c_method
}

do_one <- function(seed, n, model, method) {
    ## create model object
    true_model <- switch(
        model,
        "Independence" = VineCopula::BiCop(0, 0),
        "Gauss-0.3"    = VineCopula::BiCop(1, tau = 0.3),
        "Gauss-0.7"    = VineCopula::BiCop(1, tau = 0.7),
        "Gumbel-0.3"   = VineCopula::BiCop(1, tau = 0.3),
        "Gumbel-0.7"   = VineCopula::BiCop(4, tau = 0.7),
        "Mixture I"    = make_nmix_model(model),
        "Mixture II"   = make_nmix_model(model),
        "Mixture III"  = make_nmix_model(model)
    )
    
    ## simulate data
    set.seed(seed)
    u_sim <- sim_from_model(n, true_model)
    
    ## fit and evaluate estimator
    t_method <- try(
        system.time(c_method <- fit_and_eval(u_sim, method))[3]
    )
    if (inherits(t_method, "try-error")) {
        t_method <- c_method <- NA
    }
    
    ## return results
    c_true <- get_true_c(true_model)
    data.frame(
        n = n,
        model = model,
        method = method,
        seed = seed,
        IAE = mean(abs(c_method - c_true)),
        time = unname(t_method)
    )
}
Run simulation study

# register parallel backend
cl <- makeCluster(detectCores(), outfile = "")
registerDoParallel(cl)
kdecopula_methods <- c(
    "MR", "beta", "T", "TLL1", "TLL2", "TLL1nn", "TLL2nn", "TTPI", "TTCV",
    "MR0", "beta0", "T0", "TLL10", "TLL20", "TLL1nn0", "TLL2nn0", "TTPI0", "TTCV0"
)
other_methods <- c("np", "ks", "bs", "bern")
models <- c("Independence", "Gauss-0.3", "Gauss-0.7", "Gumbel-0.3", "Gumbel-0.7",
            "Mixture I", "Mixture II", "Mixture III")

all_results <- NULL
for (n in c(200, 1000)) {
    message("* n = ", n, ":")
    for (mod in models) {
        message("    * model: ", mod)
        for (meth in c(kdecopula_methods, other_methods)) {
            new_result <- foreach(
                seed = 1:100,
                .packages = c("VineCopula", "kdecopula", "np", "ks", "penDvine"),
                .combine = rbind
            ) %dopar% {
                do_one(seed, n, mod, meth)
            }
            all_results <- rbind(all_results, new_result)
        }
    }
}
## * n = 200:

##     * model: Independence

##     * model: Gauss-0.3

##     * model: Gauss-0.7

##     * model: Gumbel-0.3

##     * model: Gumbel-0.7

##     * model: Mixture I

##     * model: Mixture II

##     * model: Mixture III

## * n = 1000:

##     * model: Independence

##     * model: Gauss-0.3

##     * model: Gauss-0.7

##     * model: Gumbel-0.3

##     * model: Gumbel-0.7

##     * model: Mixture I

##     * model: Mixture II

##     * model: Mixture III

saveRDS(all_results, "JSS-sim-results.rds")
Analysis

General performance in terms of MIAE

method_order <- c(
    "MR", "beta", "T", "TLL1", "TLL2", "TLL1nn", "TLL2nn", "TTCV", "TTPI",
    "np", "ks", "bern", "bs"
)
all_results %>%
    filter(!grepl("0", method)) %>%
    group_by(model, n, method) %>%
    summarize_all(mean) %>%
    mutate(
        rank = rank(IAE), 
        method = factor(method, levels = method_order),
        MIAE = IAE) %>%
    ggplot(aes(method, MIAE, fill = -rank, alpha = as.factor(n))) +
    scale_fill_continuous() +
    scale_alpha_manual(values = c(0.6, 1)) +
    geom_bar(stat = "identity", width = 1.7, position = position_dodge(0)) +
    facet_grid(model ~ ., scales = "free_y") +
    geom_vline(xintercept = 9.5, linetype = 1, size = 0.2) +
    theme_bw() +
    theme(legend.position = "none") +
    theme(strip.text = element_text(size = 12, angle = 90))

Improvement through renormalization

all_results %>%
    select(-time) %>%
    spread(method, IAE) %>%
    mutate(
        MR     = 100 * (1 - MR / MR0),
        beta   = 100 * (1 - beta / beta0),
        T      = 100 * (1 - T / T0),
        TLL1   = 100 * (1 - TLL1 / TLL10),
        TLL2   = 100 * (1 - TLL2 / TLL20),
        TLL1nn = 100 * (1 - TLL1nn / TLL1nn0),
        TLL2nn = 100 * (1 - TLL2nn / TLL2nn0),
        TTCV   = 100 * (1 - TTCV / TTCV0),
        TTPI   = 100 * (1 - TTPI / TTPI0)
    ) %>%
    select(model, n, seed, MR, beta, T, TLL1, TLL2, TLL1nn, TLL2nn, TTCV, TTPI) %>%
    gather(method, improvement, -model, -n, -seed) %>%
    mutate(method = factor(method, levels = method_order)) %>%
    group_by(method) %>%
    summarize(improvement = round(mean(improvement))) %>%
    na.omit() %>%  # 0-iteration versions result in an NA group
    spread(method, improvement) %>%
    knitr::kable()


MR
beta
T
TLL1
TLL2
TLL1nn
TLL2nn
TTCV
TTPI


13
15
25
39
21
28
29
24
22


Computation time

all_results %>%
    select(-IAE) %>%
    mutate(method = factor(method, levels = method_order)) %>%
    filter(method %in% c("MR", "beta", "T", "TLL2", "TLL2nn", "TTCV", "TTPI",
                         "np", "ks", "bern", "bs")) %>%
    group_by(n, method) %>%
    summarize(time = round(mean(time), 2)) %>%
    na.omit() %>%
    spread(method, time) %>%
    knitr::kable()


n
MR
beta
T
TLL2
TLL2nn
TTCV
TTPI
np
ks
bern
bs


200
0.11
0.26
0.07
0.31
0.62
3.76
0.35
3.03
4.61
1.88
2.10


1000
0.37
0.99
0.11
0.68
1.30
58.10
13.63
37.84
41.96
3.08
3.45


Environment info

sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-pc-linux-gnu/x86_64 (64-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  splines   stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggplot2_2.2.1         tibble_1.3.0          tidyr_0.6.1          
##  [4] dplyr_0.5.0           doParallel_1.0.10     iterators_1.0.8      
##  [7] rafalib_1.0.0         GMCM_1.2.4            penDvine_0.2.4       
## [10] foreach_1.4.3         fda_2.4.4             Matrix_1.2-6         
## [13] TSP_1.1-4             lattice_0.20-35       ks_1.10.6            
## [16] mvtnorm_1.0-5         np_0.60-3             VineCopula_2.1.2.9000
## [19] kdecopula_0.9.0      
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.10        ADGofTest_0.3       locfit_1.5-9.1     
##  [4] FNN_1.1             assertthat_0.2.0    digest_0.6.9       
##  [7] mime_0.5            R6_2.2.0            plyr_1.8.4         
## [10] MatrixModels_0.4-1  stats4_3.3.1        pcaPP_1.9-61       
## [13] evaluate_0.10       highr_0.6           qrng_0.0-3         
## [16] lazyeval_0.2.0      misc3d_0.8-4        cubature_1.1-2     
## [19] SparseM_1.7         labeling_0.3        stringr_1.0.0      
## [22] htmlwidgets_0.8     munsell_0.4.3       shiny_0.14.2       
## [25] compiler_3.3.1      numDeriv_2014.2-1   httpuv_1.3.3       
## [28] gsl_1.9-10.1        htmltools_0.3.5     multicool_0.1-2    
## [31] quadprog_1.5-5      codetools_0.2-14    stabledist_0.7-0   
## [34] pspline_1.0-17      MASS_7.3-45         grid_3.3.1         
## [37] jsonlite_1.1        xtable_1.8-2        gtable_0.2.0       
## [40] DBI_0.6-1           magrittr_1.5        scales_0.4.1       
## [43] KernSmooth_2.23-15  stringi_1.1.1       reshape2_1.4.1     
## [46] latticeExtra_0.6-28 boot_1.3-18         RColorBrewer_1.1-2 
## [49] tools_3.3.1         copula_0.999-16     network_1.13.0     
## [52] colorspace_1.2-6    knitr_1.15          quantreg_5.33


## MIAE.png

      
    Raw
  

              MIAE.png
n	MR	beta	T	TLL2	TLL2nn	TTCV	TTPI	np	ks	bern	bs
200	0.11	0.26	0.07	0.31	0.62	3.76	0.35	3.03	4.61	1.88	2.10
1000	0.37	0.99	0.11	0.68	1.30	58.10	13.63	37.84	41.96	3.08	3.45