Created
November 22, 2016 20:47
-
-
Save mbjoseph/37f1ac71f484f4ea77ba566b56ae3398 to your computer and use it in GitHub Desktop.
Exploring how sample size affects the estimates of Weibull shape and scale parameters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to evaluate small sample behavior of Weibull parameter MLEs -------- | |
library(fitdistrplus) | |
library(dplyr) | |
library(tidyr) | |
library(gridExtra) | |
library(parallel) | |
library(ggplot2) | |
library(viridis) | |
# Simulate Weibull samples and MLEs across range of sample size ----------- | |
check_weib <- function(n) { | |
require(fitdistrplus) | |
x <- rweibull(n, 1, 1) | |
fitdistr(x, densfun = "weibull")$estimate | |
} | |
n_vec <- rep(4:60, each = 5000) | |
cl <- makeCluster(getOption("cl.cores", detectCores())) | |
sims <- parSapply(cl, n_vec, check_weib) | |
stopCluster(cl) | |
# Clean up simulation results --------------------------------------------- | |
# bundle results into data frame | |
df <- t(sims) %>% | |
data.frame() | |
df$n <- n_vec | |
# Visualize results ------------------------------------------------------- | |
# evaluate estimates wrt sample size | |
ggplot(df, aes(x = n, y = shape)) + | |
geom_point(alpha = .1) + | |
geom_hline(yintercept = 1, color = "red") + | |
geom_smooth() | |
ggplot(df, aes(x = n, y = scale)) + | |
geom_point(alpha = .1) + | |
geom_hline(yintercept = 1, color = "red") + | |
geom_smooth() | |
# Plot bivariate ellipses, correlations, and bias ------------------------- | |
mars <- unit(c(1, 1, 1.2, 1),"cm") | |
p1 <- df %>% | |
ggplot(aes(x = shape, y = scale, fill = n)) + | |
stat_ellipse(geom = "polygon", aes(group = n)) + | |
scale_fill_viridis("Sample size", option = "B") + | |
theme_minimal() + | |
xlab("Shape") + | |
ylab("Scale") + | |
annotate("text", x = 1, y = 1, label = "Truth", size = 4) + | |
theme(plot.margin = mars) + | |
ggtitle("90% Normal ellipses for Weibull MLE sampling distributions") | |
p2 <- df %>% | |
group_by(n) %>% | |
summarize(Shape = mean(shape - 1), | |
Scale = mean(scale - 1)) %>% | |
gather(key = Parameter, value = bias, -n) %>% | |
ggplot(aes(x = n, y = bias, color = Parameter)) + | |
geom_line() + | |
theme_minimal() + | |
scale_colour_brewer(palette = "Set1") + | |
xlab("Sample size") + | |
ylab("Bias") + | |
theme(plot.margin = mars) | |
p3 <- df %>% | |
group_by(n) %>% | |
summarize(cor = cor(shape, scale)) %>% | |
ggplot(aes(x = n, y = cor)) + | |
geom_point() + | |
theme_minimal() + | |
xlab("Sample size") + | |
ylab("Correlation: shape and scale estimates") + | |
stat_smooth(method = "loess") + | |
theme(plot.margin = mars) | |
grid.arrange(p1, arrangeGrob(p3, p2, nrow = 1), nrow = 2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment