Thomas Nagler 15 May, 2017
Required packages:
library(cctools)
library(np)
library(doParallel)
library(dplyr)
library(tidyr)
library(ggplot2)
if (!require(naglr))
devtools::install_github("tnagler/naglr")
library(naglr)
do_one <- function(seed, n, m, p, q) {
set.seed(seed)
## model setup --------------------------------
# grid
gridp <- lapply(seq_len(p), function(j) 0:m)
gridq <- lapply(seq_len(q), function(j) seq(-2, 2, by = 0.4))
grid <- as.matrix(do.call(expand.grid, c(gridp, gridq)))
colnames(grid) <- paste0("X", 1:ncol(grid))
grid_ord <- as.data.frame(grid)
for (j in seq_len(p))
grid_ord[, j] <- ordered(grid_ord[, j], 0:m)
# simulate data
u <- matrix(runif(n * (p + q)), n, p + q)
datp <- lapply(seq_len(p), function(j) qbinom(u[, j], size = m, prob = 0.3))
datq <- lapply(seq_len(q), function(j) qnorm(u[, p + j]))
dat <- c(datp, datq)
dat <- as.data.frame(dat)
colnames(dat) <- paste0("X", 1:ncol(dat))
dat_ord <- dat
for (j in seq_len(p))
dat_ord[, j] <- ordered(dat_ord[, j], 0:m)
# true density values
margsp <- lapply(seq_len(p), function(j) dbinom(grid[, j], size = m, prob = 0.3))
margsq <- lapply(seq_len(q), function(j) dnorm(grid[, p + j]))
margs <- do.call(cbind, c(margsp, margsq))
val_true <- apply(margs, 1, prod)
## estimators ------------------------------------
# Li Racine
options("np.messages" = FALSE)
options(np.tree = FALSE)
bws <- npudensbw(dat_ord, nmulti = 1, ckertype = "epanechnikov")
val_np <- fitted(npudens(bws = bws, edat = grid_ord))
# jittering kernel density
fit <- cckde(dat_ord)
val_cckde <- dcckde(grid_ord, fit)
# jittering kernel density
fit2 <- cckde(dat_ord, theta = 0.25)
val_cckde2 <- dcckde(grid_ord, fit2)
## error measures ----------------------------------
mse_np <- mean(abs((val_np - val_true))^2)
mse_cckde <- mean(abs((val_cckde - val_true))^2)
mse_cckde2 <- mean(abs((val_cckde2 - val_true))^2)
## results ----------------------------------------
data.frame(
MISE = c(mse_np, mse_cckde, mse_cckde2),
estimator = c("liracine", "jkde", "jkde2"),
p = p,
q = q,
m = m,
n = n,
seed = seed
)
}
cl <- makeCluster(detectCores(), outfile = "")
registerDoParallel(cl)
all_results <- NULL
scenarios <- list(c(1, 0), c(5, 0), c(3, 2))
for (s in seq_along(scenarios)) {
pp <- scenarios[[s]][1]
qq <- scenarios[[s]][2]
for (mm in c(1, 15)) {
for (nn in c(50, 200)) {
message("p = ", pp, ", q = ", qq, ", m = ", mm, ", n = ", nn)
new_result <- foreach(
seed = 1:1000,
.packages = c("np", "cctools", "copula"),
.combine = rbind) %dopar% {
tryCatch(
do_one(seed, n = nn, m = mm, p = pp, q = qq),
error = function(e) rep(NA, 7)
)
}
all_results <- rbind(all_results, new_result)
}
}
}
## p = 1, q = 0, m = 1, n = 50
## p = 1, q = 0, m = 1, n = 200
## p = 1, q = 0, m = 15, n = 50
## p = 1, q = 0, m = 15, n = 200
## p = 5, q = 0, m = 1, n = 50
## p = 5, q = 0, m = 1, n = 200
## p = 5, q = 0, m = 15, n = 50
## p = 5, q = 0, m = 15, n = 200
## p = 3, q = 2, m = 1, n = 50
## p = 3, q = 2, m = 1, n = 200
## p = 3, q = 2, m = 15, n = 50
## p = 3, q = 2, m = 15, n = 200
stopCluster(cl)
saveRDS(all_results, file = "jkde-sim-results.rds")
all_results <- readRDS("jkde-sim-results.rds")
all_results %>%
mutate(mpq = paste0("italic(m == ", m,
"* ~~~~~ p ==", p,
"* ~~~~~ q == ", q, ")"),
m = paste0("italic(m == ", m, ")"),
RASE = sqrt(MISE)) %>%
transform(mpq = factor(mpq, levels = levels(as.factor(mpq))[c(4, 6, 5, 1, 3, 2)])) %>%
ggplot(aes(x = estimator, y = RASE, fill = estimator, alpha = factor(n))) +
scale_fill_manual(values = naglr_pal()(3)) +
scale_alpha_manual(values = c(1, 1)) +
geom_boxplot(show.legend = FALSE, outlier.size = 0, width = 1 / 1.61) +
facet_wrap(~ mpq, scales = "free", label = label_parsed) +
ylab(expression(plain(RASE))) +
theme_bw() +
theme(aspect.ratio = 1) +
expand_limits(y = 0) +
theme_naglr(
base_size = 15,
axis_title_size = 17,
plot_margin = grid::unit(c(0,0,0,0), "mm"),
strip_text_size = 15
)
sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-pc-linux-gnu/x86_64 (64-bit)
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] naglr_0.1.0 ggplot2_2.2.1 tidyr_0.6.1 dplyr_0.5.0
## [5] doParallel_1.0.10 iterators_1.0.8 foreach_1.4.3 np_0.60-3
## [9] cctools_0.1.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.10 highr_0.6 compiler_3.3.1
## [4] plyr_1.8.4 tools_3.3.1 boot_1.3-18
## [7] digest_0.6.9 evaluate_0.10 tibble_1.3.0
## [10] gtable_0.2.0 lattice_0.20-35 Matrix_1.2-6
## [13] DBI_0.6-1 SparseM_1.7 stringr_1.0.0
## [16] knitr_1.15 MatrixModels_0.4-1 grid_3.3.1
## [19] R6_2.2.0 qrng_0.0-3 magrittr_1.5
## [22] scales_0.4.1 codetools_0.2-14 assertthat_0.2.0
## [25] cubature_1.1-2 colorspace_1.2-6 labeling_0.3
## [28] quantreg_5.33 stringi_1.1.1 lazyeval_0.2.0
## [31] munsell_0.4.3