Skip to content

Instantly share code, notes, and snippets.

@arcaravaggi
Last active July 24, 2023 15:02
Show Gist options
  • Save arcaravaggi/299cb55475455a6036324b6562f79ea7 to your computer and use it in GitHub Desktop.
Save arcaravaggi/299cb55475455a6036324b6562f79ea7 to your computer and use it in GitHub Desktop.
Generate dispersion (including plot) and summary metrics for a given vector
# Plot histogram with density curve, and measures of dispersion from the mean
#
# dispersion x axis:
# V = Sample variance
# IQR = inter-quartile range (balanced)
# SD = standard deviation
# SE = Standard error
# CI = Confidence interval (see inputs)
#
# CV = coefficent of variation
#
# t = vector of numeric or interval data
# ci = z value for the desired confidence interval. Default is 1.96 (95%)
# label.y = Axis title for histogram x and dispersion y
# Example
# set.seed(42)
# a <- runif(100, min = 18, max = 27)
# c <- rnorm(100, mean = 22.5, sd = 2.6)
# b <- c(a,c)
# vecQ(b, 1.96, "Measurement (mm)")[[2]]
vecQ <- function(t, ci = 1.96, label.y){
require(grid)
require(ggplot2)
require(moments)
require(ggpubr)
df <- data.frame(cat = c("V", "IQR", "SD", "SE", "CI"), mean = NA, d.lower = NA, d.upper =NA)
df$cat <- factor(df$cat, levels = unique(df$cat))
m <- mean(t,na.rm = TRUE)
v <- var(t, na.rm = TRUE)
s <- sd(t, na.rm = TRUE)
n <- length(t)
se <- s/sqrt(n)
z <- ci
df$mean <- m
df$d.lower[1] <- m - v
df$d.upper[1] <- m + v
df$d.lower[2] <- quantile(t)[2]
df$d.upper[2] <- quantile(t)[4]
df$d.lower[3] <- m - s
df$d.upper[3] <- m + s
df$d.lower[4] <- m - se
df$d.upper[4] <- m + se
df$d.lower[5] <- m - (z*se)
df$d.upper[5] <- m + (z*se)
cv <- sd(t, na.rm=TRUE)/
mean(t, na.rm=TRUE)*100
cv <- round(cv, 2)
sk <- skewness(t)
sk <- round(sk, 3)
grob <- grobTree(textGrob(paste0("Skewness = ", sk), x=0.01, y=0.95, hjust=0,
gp=gpar(fontsize=12)))
kt <- kurtosis(t)
kt <- round(kt, 3)
grob2 <- grobTree(textGrob(paste0("Kurtosis = ", kt), x=0.01, y=0.85, hjust=0,
gp=gpar(fontsize=12)))
plot.a <- ggplot(df, aes(x = cat, y = mean)) +
geom_point() +
geom_errorbar(aes(ymin = d.lower, ymax = d.upper), width=.3,)+
annotate("text", x = 3, y = m+s+(se*2), label = paste0("CV = ", cv, size = 5)) +
theme_bw() +
theme(axis.text = element_text(size = 16),
axis.title = element_text(size=16,face="bold")) +
ylab(label.y) +
xlab("Measure of dispersion")
plot.b <- ggplot(as.data.frame(t), aes(t)) +
geom_histogram(aes(y = ..density..),
binwidth = 0.5,
fill = I("white"),
col = "black") +
geom_density(col = "#009E73",
lwd = 2) +
geom_vline(xintercept = median(t),
col = "#E69F00",
lty = 1,
lwd = 2) +
geom_vline(xintercept = mean(t),
col = "#0072B2",
lty = 2,
lwd = 2) +
annotation_custom(grob) +
annotation_custom(grob2) +
theme_bw() +
theme(axis.text = element_text(size = 16),
axis.title = element_text(size=16,face="bold")) +
xlab(label.y) +
ylab("Density")
plot <- ggarrange(plot.b, plot.a,
ncol = 2, nrow = 1)
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
summary.list <- function(x)list(
N.minus.NA = length(x[!is.na(x)]),
NA.count = length(x[is.na(x)]),
Skewness = sk,
Kurtosis = kt,
Max.Min = range(x, na.rm=TRUE),
Range = max(x, na.rm=TRUE) - min(x, na.rm=TRUE),
Mode = getmode(x),
Median = median(x, na.rm=TRUE),
Quantile = quantile(x, na.rm=TRUE),
Variance = v,
Mean = m,
Std.Dev = s,
Coeff.Variation = cv,
Std.Error = se
)
s <- summary.list(t)
return(list(df, plot, s))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment