Skip to content

Instantly share code, notes, and snippets.

@tnagler
Last active September 25, 2019 14:33
Show Gist options
  • Save tnagler/bd194a711026c3d375ab6ae023a5bad5 to your computer and use it in GitHub Desktop.
Save tnagler/bd194a711026c3d375ab6ae023a5bad5 to your computer and use it in GitHub Desktop.
Simulation study for kdecopula vignette

Simulation study for kdecopula vignette

Thomas Nagler 15 May, 2017

Required packages:

library("kdecopula")
library("VineCopula")
library("np")
library("ks")
library("penDvine")
library("GMCM")
library("rafalib")
library("doParallel")
library("dplyr")
library("tidyr")
library("tibble")
library("ggplot2")

Utility functions for Gaussian mixtures

make_sigma_mat <- function(v1, v2, rho) {
    s12 <- sqrt(v1 * v2) * rho
    matrix(c(v1, s12, s12, v2), 2, 2)
}

make_nmix_model <- function(model_name) {
    model <- switch(
        model_name,
        "Mixture I" = list(
            m = 2,
            d = 2,
            pie = c(1/2, 1/2),
            mu = list(c(0, -1.5), c(1, 1)),
            sigma = list(
                make_sigma_mat(0.5, 0.5, 0.3),
                make_sigma_mat(0.5, 0.5, 0.8)
            )
        ),
        "Mixture II" = list(
            m = 3,
            d = 2,
            pie = c(2/5, 2/5, 1/5),
            mu = list(c(0, -1), c(0, 1), c(2, 1)),
            sigma = list(
                make_sigma_mat(0.5, 0.5, -0.8),
                make_sigma_mat(0.5, 0.5, 0.2),
                make_sigma_mat(0.5, 0.5, 0.8)
            )
        ),
        "Mixture III" = list(
            m = 2,
            d = 2,
            pie = c(1/2, 1/2),
            mu = list(c(1, -1), c(-1, 1)),
            sigma = list(
                make_sigma_mat(0.5, 0.5, 0.6),
                make_sigma_mat(0.5, 0.5, 0.6)
            )
        ),
        stop("model not implemented")
    )
    structure(model, class = "cop_nmix")
}
# based on plot.kdecopula
contour_cop_nmix <- function(x, ...) {
    size <- 100
    xylim <- c(-3, 3)
    points <- pnorm(seq(xylim[1], xylim[2], length.out = size))
    g <- as.matrix(expand.grid(points, points))
    points <- qnorm(g[1:size, 1])
    
    ## evaluate on grid
    vals <- exp(GMCM:::dgmcm.loglik(x, g, marginal.loglik = TRUE))
    cop <- matrix(vals, size, size)
    
    # set default parameters
    pars <- list(x = points,
                 y = points,
                 z = cop * tcrossprod(dnorm(points)),
                 levels = c(0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5),
                 xlim = xylim,
                 ylim = xylim,
                 xlab = expression(z[1]),
                 ylab = expression(z[2]))
    
    # call contour with final parameters
    do.call(contour, modifyList(pars, list(...)))
}

Utility functions for simulation study

sim_from_model <- function(n, model) {
    if (class(model) == "cop_nmix") {
        return(GMCM::SimulateGMCMData(n, theta = model)$u)
    } else {
        return(VineCopula::BiCopSim(n, model))
    }
}

get_true_c <- function(model) {
    g <- as.matrix(expand.grid(u = 1:100/101, v = 1:100/101))
    if (class(model) == "cop_nmix") {
        return(exp(GMCM:::dgmcm.loglik(model, g, marginal.loglik = TRUE)))
    } else {
        return(VineCopula::BiCopPDF(g[, 1], g[, 2], model))
    }
}

fit_and_eval <- function(u, method) {
    ## evaluation grids
    g <- as.matrix(expand.grid(u = 1:100/101, v = 1:100/101))
    g_np <- cbind(u = 1:100/101, v = 1:100/101)
    
    ## fit and evaluate methods
    if (method %in% c(kdecopula_methods)) {
        fit <- kdecop(u,
                      method = gsub("0", "", method),
                      renorm.iter = ifelse(grepl("0", method), 0, 3))
        c_method <- dkdecop(g, fit)
    } else if (method == "np") {
        df <- data.frame(u1 = u[, 1], u2 = u[, 2])
        tmp <- capture.output({
            fit <- npudensbw(~ u1 + u2, data = df)
            c_method <- npcopula(fit, data = df, u = g_np)[, 1]
            
        })
    } else if (method == "ks") {
        fit <- suppressWarnings(kcopula.de(u, Hpi(u)))
        c_method <- predict(fit, u = g)
    } else if (method == "bs") {
        suppressMessages(fit <- paircopula(u, base = "B-spline"))
        c_method <- plot(fit, val = g, plot.view = FALSE)$fit
    } else if (method == "bern") {
        suppressMessages(fit <- paircopula(u, base = "Bernstein"))
        c_method <- plot(fit, val = g, plot.view = FALSE)$fit
    }
    
    c_method
}

do_one <- function(seed, n, model, method) {
    ## create model object
    true_model <- switch(
        model,
        "Independence" = VineCopula::BiCop(0, 0),
        "Gauss-0.3"    = VineCopula::BiCop(1, tau = 0.3),
        "Gauss-0.7"    = VineCopula::BiCop(1, tau = 0.7),
        "Gumbel-0.3"   = VineCopula::BiCop(1, tau = 0.3),
        "Gumbel-0.7"   = VineCopula::BiCop(4, tau = 0.7),
        "Mixture I"    = make_nmix_model(model),
        "Mixture II"   = make_nmix_model(model),
        "Mixture III"  = make_nmix_model(model)
    )
    
    ## simulate data
    set.seed(seed)
    u_sim <- sim_from_model(n, true_model)
    
    ## fit and evaluate estimator
    t_method <- try(
        system.time(c_method <- fit_and_eval(u_sim, method))[3]
    )
    if (inherits(t_method, "try-error")) {
        t_method <- c_method <- NA
    }
    
    ## return results
    c_true <- get_true_c(true_model)
    data.frame(
        n = n,
        model = model,
        method = method,
        seed = seed,
        IAE = mean(abs(c_method - c_true)),
        time = unname(t_method)
    )
}

Run simulation study

# register parallel backend
cl <- makeCluster(detectCores(), outfile = "")
registerDoParallel(cl)
kdecopula_methods <- c(
    "MR", "beta", "T", "TLL1", "TLL2", "TLL1nn", "TLL2nn", "TTPI", "TTCV",
    "MR0", "beta0", "T0", "TLL10", "TLL20", "TLL1nn0", "TLL2nn0", "TTPI0", "TTCV0"
)
other_methods <- c("np", "ks", "bs", "bern")
models <- c("Independence", "Gauss-0.3", "Gauss-0.7", "Gumbel-0.3", "Gumbel-0.7",
            "Mixture I", "Mixture II", "Mixture III")

all_results <- NULL
for (n in c(200, 1000)) {
    message("* n = ", n, ":")
    for (mod in models) {
        message("    * model: ", mod)
        for (meth in c(kdecopula_methods, other_methods)) {
            new_result <- foreach(
                seed = 1:100,
                .packages = c("VineCopula", "kdecopula", "np", "ks", "penDvine"),
                .combine = rbind
            ) %dopar% {
                do_one(seed, n, mod, meth)
            }
            all_results <- rbind(all_results, new_result)
        }
    }
}
## * n = 200:

##     * model: Independence

##     * model: Gauss-0.3

##     * model: Gauss-0.7

##     * model: Gumbel-0.3

##     * model: Gumbel-0.7

##     * model: Mixture I

##     * model: Mixture II

##     * model: Mixture III

## * n = 1000:

##     * model: Independence

##     * model: Gauss-0.3

##     * model: Gauss-0.7

##     * model: Gumbel-0.3

##     * model: Gumbel-0.7

##     * model: Mixture I

##     * model: Mixture II

##     * model: Mixture III
saveRDS(all_results, "JSS-sim-results.rds")

Analysis

General performance in terms of MIAE

method_order <- c(
    "MR", "beta", "T", "TLL1", "TLL2", "TLL1nn", "TLL2nn", "TTCV", "TTPI",
    "np", "ks", "bern", "bs"
)
all_results %>%
    filter(!grepl("0", method)) %>%
    group_by(model, n, method) %>%
    summarize_all(mean) %>%
    mutate(
        rank = rank(IAE), 
        method = factor(method, levels = method_order),
        MIAE = IAE) %>%
    ggplot(aes(method, MIAE, fill = -rank, alpha = as.factor(n))) +
    scale_fill_continuous() +
    scale_alpha_manual(values = c(0.6, 1)) +
    geom_bar(stat = "identity", width = 1.7, position = position_dodge(0)) +
    facet_grid(model ~ ., scales = "free_y") +
    geom_vline(xintercept = 9.5, linetype = 1, size = 0.2) +
    theme_bw() +
    theme(legend.position = "none") +
    theme(strip.text = element_text(size = 12, angle = 90))

JSS-sim-MIAE

Improvement through renormalization

all_results %>%
    select(-time) %>%
    spread(method, IAE) %>%
    mutate(
        MR     = 100 * (1 - MR / MR0),
        beta   = 100 * (1 - beta / beta0),
        T      = 100 * (1 - T / T0),
        TLL1   = 100 * (1 - TLL1 / TLL10),
        TLL2   = 100 * (1 - TLL2 / TLL20),
        TLL1nn = 100 * (1 - TLL1nn / TLL1nn0),
        TLL2nn = 100 * (1 - TLL2nn / TLL2nn0),
        TTCV   = 100 * (1 - TTCV / TTCV0),
        TTPI   = 100 * (1 - TTPI / TTPI0)
    ) %>%
    select(model, n, seed, MR, beta, T, TLL1, TLL2, TLL1nn, TLL2nn, TTCV, TTPI) %>%
    gather(method, improvement, -model, -n, -seed) %>%
    mutate(method = factor(method, levels = method_order)) %>%
    group_by(method) %>%
    summarize(improvement = round(mean(improvement))) %>%
    na.omit() %>%  # 0-iteration versions result in an NA group
    spread(method, improvement) %>%
    knitr::kable()
MR beta T TLL1 TLL2 TLL1nn TLL2nn TTCV TTPI
13 15 25 39 21 28 29 24 22

Computation time

all_results %>%
    select(-IAE) %>%
    mutate(method = factor(method, levels = method_order)) %>%
    filter(method %in% c("MR", "beta", "T", "TLL2", "TLL2nn", "TTCV", "TTPI",
                         "np", "ks", "bern", "bs")) %>%
    group_by(n, method) %>%
    summarize(time = round(mean(time), 2)) %>%
    na.omit() %>%
    spread(method, time) %>%
    knitr::kable()
n MR beta T TLL2 TLL2nn TTCV TTPI np ks bern bs
200 0.11 0.26 0.07 0.31 0.62 3.76 0.35 3.03 4.61 1.88 2.10
1000 0.37 0.99 0.11 0.68 1.30 58.10 13.63 37.84 41.96 3.08 3.45

Environment info

sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-pc-linux-gnu/x86_64 (64-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  splines   stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggplot2_2.2.1         tibble_1.3.0          tidyr_0.6.1          
##  [4] dplyr_0.5.0           doParallel_1.0.10     iterators_1.0.8      
##  [7] rafalib_1.0.0         GMCM_1.2.4            penDvine_0.2.4       
## [10] foreach_1.4.3         fda_2.4.4             Matrix_1.2-6         
## [13] TSP_1.1-4             lattice_0.20-35       ks_1.10.6            
## [16] mvtnorm_1.0-5         np_0.60-3             VineCopula_2.1.2.9000
## [19] kdecopula_0.9.0      
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.10        ADGofTest_0.3       locfit_1.5-9.1     
##  [4] FNN_1.1             assertthat_0.2.0    digest_0.6.9       
##  [7] mime_0.5            R6_2.2.0            plyr_1.8.4         
## [10] MatrixModels_0.4-1  stats4_3.3.1        pcaPP_1.9-61       
## [13] evaluate_0.10       highr_0.6           qrng_0.0-3         
## [16] lazyeval_0.2.0      misc3d_0.8-4        cubature_1.1-2     
## [19] SparseM_1.7         labeling_0.3        stringr_1.0.0      
## [22] htmlwidgets_0.8     munsell_0.4.3       shiny_0.14.2       
## [25] compiler_3.3.1      numDeriv_2014.2-1   httpuv_1.3.3       
## [28] gsl_1.9-10.1        htmltools_0.3.5     multicool_0.1-2    
## [31] quadprog_1.5-5      codetools_0.2-14    stabledist_0.7-0   
## [34] pspline_1.0-17      MASS_7.3-45         grid_3.3.1         
## [37] jsonlite_1.1        xtable_1.8-2        gtable_0.2.0       
## [40] DBI_0.6-1           magrittr_1.5        scales_0.4.1       
## [43] KernSmooth_2.23-15  stringi_1.1.1       reshape2_1.4.1     
## [46] latticeExtra_0.6-28 boot_1.3-18         RColorBrewer_1.1-2 
## [49] tools_3.3.1         copula_0.999-16     network_1.13.0     
## [52] colorspace_1.2-6    knitr_1.15          quantreg_5.33
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment