Skip to content

Instantly share code, notes, and snippets.

@artemklevtsov
Last active February 7, 2016 14:10
Show Gist options
  • Save artemklevtsov/f4d261b2f1d354261484 to your computer and use it in GitHub Desktop.
Save artemklevtsov/f4d261b2f1d354261484 to your computer and use it in GitHub Desktop.
desc_stats_by <- function(x, ...) {
UseMethod("desc_stats_by")
}
desc_stats_by.data.frame <- function(x, by, ...) {
by <- substitute(by)
if (!is.numeric(by))
by <- which(names(x) == as.character(by))
g <- x[, by]
x <- x[, -by]
res <- lapply(split.data.frame(x, g), desc_stats.data.frame, ...)
res <- do.call(rbind, res)
group <- rep(sort(unique(g)), each = length(x))
cn <- names(res)
res <- c(list(group = group), res)
class(res) <- "data.frame"
attr(res, "row.names") <- .set_row_names(length(group))
res
}
desc_stats <- function(x, ...) {
UseMethod("desc_stats")
}
desc_stats.default <- function(x, na.rm = TRUE, skew = FALSE, norm = FALSE) {
stopifnot(is.numeric(x))
if (na.rm) x <- x[!is.na(x)]
n <- length(x)
mean <- sum(x) / n
dev <- x - mean
sd <- sqrt(sum(dev^2L) / (n - 1L))
se <- sd / sqrt(n)
half <- (n + 1L) %/% 2L
if (n %% 2L == 1L)
median <- sort.int(x, partial = half)[half]
else
median <- sum(sort.int(x, partial = half + 0L:1L)[half + 0L:1L]) / 2L
min <- min(x)
max <- max(x)
range <- max - min
res <- c(n = n, mean = mean, se = se, sd = sd, median = median, min = min, max = max, range = range)
if (skew) {
skewness <- (sum(dev^3L) / n) / (sqrt(sum(dev^2L) / (n - 1L))^3L)
kurtosis <- (sum(dev^4L) / n) / (sqrt(sum(dev^2L) / (n - 1L))^4L) - 3L
res <- c(res, skewness = skewness, kurtosis = kurtosis)
}
if (norm) {
test <- shapiro.test(x)
res <- c(res, test$statistic, p.value = test$p.value)
}
return(res)
}
desc_stats.matrix <- function(x, ...) {
x <- as.data.frame(x)
desc_stats.data.frame(x, ...)
}
desc_stats.list <- function(x, ...) {
lns <- lengths(x)
if (max(lns) - min(lns) > 0)
stop("List elements must be the same lengths.")
x <- as.data.frame(x)
desc_stats.data.frame(x, ...)
}
desc_stats.data.frame <- function(x, ...) {
x <- x[vapply(x, is.numeric, logical(1L))]
res <- lapply(x, desc_stats.default, ...)
res <- do.call(rbind, res)
res <- as.data.frame(res)
cn <- names(res)
res <- c(list(vars = names(x)), res)
class(res) <- "data.frame"
attr(res, "row.names") <- .set_row_names(length(x))
res
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment