Skip to content

Instantly share code, notes, and snippets.

@fawda123
Last active June 23, 2018 18:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fawda123/a1358a1ac60e5b8a2b9e0d1a3aa4fda2 to your computer and use it in GitHub Desktop.
Save fawda123/a1358a1ac60e5b8a2b9e0d1a3aa4fda2 to your computer and use it in GitHub Desktop.
limits of imputePSF, compared with na.mean on simulated time series
library(tidyverse)
library(gridExtra)
library(grid)
library(imputeTestbench)
library(PSF)
library(imputePSF)
library(imputeTS)
library(scales)
# total obs in each simulated time series
n <- length(nottem)
# average of nottem for centering simulated data
avenot <- mean(nottem)
# create simulated data
# wvs is periodic only, nrms is random only, sim is combo of the two
simdat <- crossing(
mts = seq(0, 1, length = 25),
sds = seq(0, 8, length = 25)
) %>%
group_by(mts, sds) %>%
nest %>%
mutate(
wvs = map(mts, function(mts) ((as.numeric(nottem) - avenot) * mts) + avenot),
nrm = map(sds, function(sds) rnorm(n = n, sd = sds))
) %>%
select(-data) %>%
mutate(
sim = pmap(list(wvs, nrm), function(wvs, nrm) wvs + nrm)
)
# run imputePSF with imputeTestbench, compare with replacement by means
simres <- simdat %>%
mutate(
errs = map(sim, impute_errors,
methods = c("impute_PSF", "na.mean"), # "na.approx", "na.locf", "na.mean"),
smps = 'mar', missPercentFrom = 30, missPercentTo = 30, blck = 100,
errorParameter = 'mape')
)
# optionally save and reload, simres takes a while
save(simres, file = 'simres.RData', compress = 'xz')
# load(file = 'simres.RData')
##
# plot output
# example plot for simulated observations, data prep
toplo <- simdat %>%
filter(mts %in% c(0, 0.5, 1) & sds %in% c(0, 4, 8)) %>%
mutate(ind = list(1:n)) %>%
unnest
# plot
p1 <- ggplot(toplo, aes(x = ind, y = sim)) +
geom_line(size = 0.25) +
geom_point(size = 0.5, alpha = 0.8, colour = 'black') +
facet_grid(mts ~ sds) +
theme_bw(base_family = 'serif', base_size = 14) +
theme(
strip.background = element_blank()
) +
scale_y_continuous('Simulated values') +
scale_x_continuous('Index')
# plot of imputation differences, data prep
pls <- simres %>%
mutate(
toplo = map(errs, as.data.frame)
) %>%
select(mts, sds, toplo) %>%
unnest %>%
select(-Parameter) %>%
mutate(
errdf = na.mean - impute_PSF,
dir = ifelse(errdf > 0, '+', '')
)
# ggplot base theme
pbase <- theme_bw(base_family = 'serif') +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
legend.position = 'top',
legend.direction = 'horizontal',
plot.margin = unit(c(1,1,0,0), "lines"),
strip.background = element_blank(),
strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5),
panel.background = element_rect(fill = 'black')
)
# plot
p2 <- ggplot(pls) +
geom_tile(aes(y = mts, x = sds, fill = errdf), colour = 'black') +
geom_text(aes(y = mts, x = sds, label = dir)) +
pbase +
scale_y_reverse('Increasing signal', expand = c(0, 0)) +
scale_x_continuous('Increasing noise', expand = c(0, 0)) +
scale_fill_gradient2('Error difference\n(mean replacement - imputePSF)', low = muted("red"), mid = "white", high = muted("green"), midpoint = 0) +
guides(fill = guide_colourbar(barheight = 0.5, barwidth = 8))
gparg <- gpar(fontfamily = 'serif', cex = 1.2)
# first plot save as jpeg
jpeg('simex.jpg', height = 5, width = 6.5, units = 'in', res = 500, quality = 100)
grid.arrange(
arrangeGrob(
p1,
top = textGrob('Increasing noise', gp = gparg),
right = textGrob('Increasing periodicity', rot = 270, gp = gparg))
)
dev.off()
# second plot save as jpeg
jpeg('simres.jpg', height = 4, 4.5, units = 'in', res = 500, quality = 100)
p2
dev.off()
# R version 3.4.4 (2018-03-15)
# Platform: x86_64-w64-mingw32/x64 (64-bit)
# Running under: Windows >= 8 x64 (build 9200)
#
# Matrix products: default
#
# locale:
# [1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252
# [3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C
# [5] LC_TIME=English_United States.1252
#
# attached base packages:
# [1] grid stats graphics grDevices utils datasets methods base
#
# other attached packages:
# [1] bindrcpp_0.2 scales_0.5.0.9000 imputeTS_2.5 imputePSF_0.1.0 PSF_0.4
# [6] imputeTestbench_3.0.1 gridExtra_2.2.1 dplyr_0.7.3 purrr_0.2.3 readr_1.1.1
# [11] tidyr_0.7.1 tibble_1.4.2 ggplot2_2.2.1.9000 tidyverse_1.1.1
#
# loaded via a namespace (and not attached):
# [1] Rcpp_0.12.16 lubridate_1.6.0 lattice_0.20-35 zoo_1.8-0 digest_0.6.15
# [6] assertthat_0.2.0 lmtest_0.9-35 psych_1.7.5 R6_2.2.2 cellranger_1.1.0
# [11] plyr_1.8.4 httr_1.2.1 pillar_1.2.2 rlang_0.2.0.9001 lazyeval_0.2.1
# [16] readxl_1.0.0 data.table_1.10.4-3 fracdiff_1.4-2 TTR_0.23-1 labeling_0.3
# [21] stringr_1.2.0 foreign_0.8-69 munsell_0.4.3 broom_0.4.2 compiler_3.4.4
# [26] modelr_0.1.0 pkgconfig_2.0.1 mnormt_1.5-5 forecast_8.1 tidyselect_0.2.0
# [31] nnet_7.3-12 quadprog_1.5-5 withr_2.1.2 nlme_3.1-131.1 jsonlite_1.5
# [36] gtable_0.2.0 magrittr_1.5 quantmod_0.4-10 stringi_1.1.5 reshape2_1.4.3
# [41] tseries_0.10-42 timeDate_3012.100 xml2_1.1.1 xts_0.10-0 tools_3.4.4
# [46] forcats_0.2.0 glue_1.1.1 hms_0.3 parallel_3.4.4 yaml_2.1.14
# [51] colorspace_1.3-2 cluster_2.0.6 stinepack_1.3 rvest_0.3.2 bindr_0.1
# [56] haven_1.0.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment