Skip to content

Instantly share code, notes, and snippets.

@mbjoseph
Created November 22, 2016 20:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mbjoseph/37f1ac71f484f4ea77ba566b56ae3398 to your computer and use it in GitHub Desktop.
Save mbjoseph/37f1ac71f484f4ea77ba566b56ae3398 to your computer and use it in GitHub Desktop.
Exploring how sample size affects the estimates of Weibull shape and scale parameters
# Script to evaluate small sample behavior of Weibull parameter MLEs --------
library(fitdistrplus)
library(dplyr)
library(tidyr)
library(gridExtra)
library(parallel)
library(ggplot2)
library(viridis)
# Simulate Weibull samples and MLEs across range of sample size -----------
check_weib <- function(n) {
require(fitdistrplus)
x <- rweibull(n, 1, 1)
fitdistr(x, densfun = "weibull")$estimate
}
n_vec <- rep(4:60, each = 5000)
cl <- makeCluster(getOption("cl.cores", detectCores()))
sims <- parSapply(cl, n_vec, check_weib)
stopCluster(cl)
# Clean up simulation results ---------------------------------------------
# bundle results into data frame
df <- t(sims) %>%
data.frame()
df$n <- n_vec
# Visualize results -------------------------------------------------------
# evaluate estimates wrt sample size
ggplot(df, aes(x = n, y = shape)) +
geom_point(alpha = .1) +
geom_hline(yintercept = 1, color = "red") +
geom_smooth()
ggplot(df, aes(x = n, y = scale)) +
geom_point(alpha = .1) +
geom_hline(yintercept = 1, color = "red") +
geom_smooth()
# Plot bivariate ellipses, correlations, and bias -------------------------
mars <- unit(c(1, 1, 1.2, 1),"cm")
p1 <- df %>%
ggplot(aes(x = shape, y = scale, fill = n)) +
stat_ellipse(geom = "polygon", aes(group = n)) +
scale_fill_viridis("Sample size", option = "B") +
theme_minimal() +
xlab("Shape") +
ylab("Scale") +
annotate("text", x = 1, y = 1, label = "Truth", size = 4) +
theme(plot.margin = mars) +
ggtitle("90% Normal ellipses for Weibull MLE sampling distributions")
p2 <- df %>%
group_by(n) %>%
summarize(Shape = mean(shape - 1),
Scale = mean(scale - 1)) %>%
gather(key = Parameter, value = bias, -n) %>%
ggplot(aes(x = n, y = bias, color = Parameter)) +
geom_line() +
theme_minimal() +
scale_colour_brewer(palette = "Set1") +
xlab("Sample size") +
ylab("Bias") +
theme(plot.margin = mars)
p3 <- df %>%
group_by(n) %>%
summarize(cor = cor(shape, scale)) %>%
ggplot(aes(x = n, y = cor)) +
geom_point() +
theme_minimal() +
xlab("Sample size") +
ylab("Correlation: shape and scale estimates") +
stat_smooth(method = "loess") +
theme(plot.margin = mars)
grid.arrange(p1, arrangeGrob(p3, p2, nrow = 1), nrow = 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment