Skip to content

Instantly share code, notes, and snippets.

@tnagler
Last active September 25, 2019 14:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tnagler/786465cee2c774a844ff1846e7cdacd8 to your computer and use it in GitHub Desktop.
Save tnagler/786465cee2c774a844ff1846e7cdacd8 to your computer and use it in GitHub Desktop.
Simulation study for "Asymptotic analysis of the jittering estimator"

Simulation study for "Asymptotic analysis of the continuous convolution estimator"

Thomas Nagler 15 May, 2017

Required packages:

library(cctools)
library(np)
library(doParallel)
library(dplyr)
library(tidyr)
library(ggplot2)
if (!require(naglr)) 
    devtools::install_github("tnagler/naglr")
library(naglr)

Code for simulation and estimation

do_one <- function(seed, n, m, p, q) {
    set.seed(seed)
    ## model setup --------------------------------
    # grid
    gridp <- lapply(seq_len(p), function(j) 0:m)
    gridq <- lapply(seq_len(q), function(j) seq(-2, 2, by = 0.4))
    grid <- as.matrix(do.call(expand.grid, c(gridp, gridq)))
    colnames(grid) <- paste0("X", 1:ncol(grid))
    grid_ord <- as.data.frame(grid)
    for (j in seq_len(p))
        grid_ord[, j] <- ordered(grid_ord[, j], 0:m)
    
    # simulate data
    u <- matrix(runif(n * (p + q)), n, p + q)
    datp <- lapply(seq_len(p), function(j) qbinom(u[, j], size = m, prob = 0.3))
    datq <- lapply(seq_len(q), function(j) qnorm(u[, p + j]))
    
    dat <- c(datp, datq)
    dat <- as.data.frame(dat)
    colnames(dat) <- paste0("X", 1:ncol(dat))
    dat_ord <- dat
    for (j in seq_len(p)) 
        dat_ord[, j] <- ordered(dat_ord[, j], 0:m)
    
    # true density values
    margsp <- lapply(seq_len(p), function(j) dbinom(grid[, j], size = m, prob = 0.3))
    margsq <- lapply(seq_len(q), function(j) dnorm(grid[, p + j]))
    margs <- do.call(cbind, c(margsp, margsq))
    val_true <- apply(margs, 1, prod)
    
    
    ## estimators ------------------------------------
    # Li Racine
    options("np.messages" = FALSE)
    options(np.tree = FALSE)
    bws <-  npudensbw(dat_ord, nmulti = 1, ckertype = "epanechnikov")
    val_np <- fitted(npudens(bws = bws, edat = grid_ord))
    
    # jittering kernel density
    fit <- cckde(dat_ord)
    val_cckde <- dcckde(grid_ord, fit)
    
    # jittering kernel density
    fit2 <- cckde(dat_ord, theta = 0.25)
    val_cckde2 <- dcckde(grid_ord, fit2)
    
    
    ## error measures ----------------------------------
    mse_np     <- mean(abs((val_np - val_true))^2)
    mse_cckde <- mean(abs((val_cckde - val_true))^2)
    mse_cckde2 <- mean(abs((val_cckde2 - val_true))^2)
    
    
    ## results ----------------------------------------
    data.frame(
        MISE = c(mse_np, mse_cckde, mse_cckde2),
        estimator = c("liracine", "jkde", "jkde2"),
        p = p,
        q = q,
        m = m,
        n = n,
        seed = seed
    )
}

Run the study

cl <- makeCluster(detectCores(), outfile = "")
registerDoParallel(cl)

all_results <- NULL
scenarios <- list(c(1, 0), c(5, 0), c(3, 2))
for (s in seq_along(scenarios)) {
    pp <- scenarios[[s]][1]
    qq <- scenarios[[s]][2]
    for (mm in c(1, 15)) {
        for (nn in c(50, 200)) {
            message("p = ", pp, ", q = ", qq, ", m = ", mm, ", n = ", nn)
            new_result <- foreach(
                seed = 1:1000,
                .packages = c("np", "cctools", "copula"),
                .combine = rbind) %dopar% {
                    tryCatch(
                        do_one(seed, n = nn, m = mm, p = pp, q = qq),
                        error = function(e) rep(NA, 7)
                    )
                }
            all_results <- rbind(all_results, new_result)
        }
    }
}
## p = 1, q = 0, m = 1, n = 50

## p = 1, q = 0, m = 1, n = 200

## p = 1, q = 0, m = 15, n = 50

## p = 1, q = 0, m = 15, n = 200

## p = 5, q = 0, m = 1, n = 50

## p = 5, q = 0, m = 1, n = 200

## p = 5, q = 0, m = 15, n = 50

## p = 5, q = 0, m = 15, n = 200

## p = 3, q = 2, m = 1, n = 50

## p = 3, q = 2, m = 1, n = 200

## p = 3, q = 2, m = 15, n = 50

## p = 3, q = 2, m = 15, n = 200
stopCluster(cl)
saveRDS(all_results, file = "jkde-sim-results.rds")

Analysis

all_results <- readRDS("jkde-sim-results.rds")
all_results %>%
    mutate(mpq = paste0("italic(m == ", m,
                        "* ~~~~~ p ==", p,
                        "* ~~~~~ q == ", q, ")"),
           m = paste0("italic(m == ", m, ")"),
           RASE = sqrt(MISE)) %>%
    transform(mpq = factor(mpq, levels = levels(as.factor(mpq))[c(4, 6, 5, 1, 3, 2)])) %>%
    ggplot(aes(x = estimator, y = RASE, fill = estimator, alpha = factor(n))) +
    scale_fill_manual(values = naglr_pal()(3)) +
    scale_alpha_manual(values = c(1, 1)) +
    geom_boxplot(show.legend = FALSE, outlier.size = 0, width = 1 / 1.61) +
    facet_wrap(~ mpq, scales = "free", label = label_parsed) +
    ylab(expression(plain(RASE))) +
    theme_bw() +
    theme(aspect.ratio = 1) +
    expand_limits(y = 0) +
    theme_naglr(
        base_size = 15,
        axis_title_size = 17,
        plot_margin = grid::unit(c(0,0,0,0), "mm"),
        strip_text_size = 15
    )

plot of chunk cckde-sim-results

Environment information

sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-pc-linux-gnu/x86_64 (64-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] naglr_0.1.0       ggplot2_2.2.1     tidyr_0.6.1       dplyr_0.5.0      
## [5] doParallel_1.0.10 iterators_1.0.8   foreach_1.4.3     np_0.60-3        
## [9] cctools_0.1.0    
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.10       highr_0.6          compiler_3.3.1    
##  [4] plyr_1.8.4         tools_3.3.1        boot_1.3-18       
##  [7] digest_0.6.9       evaluate_0.10      tibble_1.3.0      
## [10] gtable_0.2.0       lattice_0.20-35    Matrix_1.2-6      
## [13] DBI_0.6-1          SparseM_1.7        stringr_1.0.0     
## [16] knitr_1.15         MatrixModels_0.4-1 grid_3.3.1        
## [19] R6_2.2.0           qrng_0.0-3         magrittr_1.5      
## [22] scales_0.4.1       codetools_0.2-14   assertthat_0.2.0  
## [25] cubature_1.1-2     colorspace_1.2-6   labeling_0.3      
## [28] quantreg_5.33      stringi_1.1.1      lazyeval_0.2.0    
## [31] munsell_0.4.3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment