Skip to content

Instantly share code, notes, and snippets.

@tobigithub
Created November 2, 2015 00:42
Show Gist options
  • Save tobigithub/b84d4480cb4738036d32 to your computer and use it in GitHub Desktop.
Save tobigithub/b84d4480cb4738036d32 to your computer and use it in GitHub Desktop.
tabstat in R (summary statistics latex)
#' Advanced Summary Statistics Table with Description Option
#'
#' This function allows you to output a summary table much like tabstat. Statistics are based on the columns/variables of the dataframe. Only works when the number of columns is greater than 1.
#' @param data.frame Data frame object
#' @param variables Number or vector of variables/ variable names to include
#' @param table Type of table to return. Options are "simple"/"table" or "latex"/"xtable"
#' @param caption Caption to include underneath the table if "latex"/"xtable"
#' @param digits Digits to display if "latex"/"xtable"
#' @param stats Statistics to include: 'count','sum','max','min','range','sd','var', 'cv','semean','skewness','kurtosis', 'q1','q5','q10','q25','median', 'q75','q90','q95','q99','iqr'
#' @param description Further description for each variable to be included in the table
#' @keywords Summary
#' @keywords Statistics
#' @keywords Table
#' @export
#' @examples
#' data.frame=data.frame(matrix(rnorm(30),ncol=5))
#' tableStats2(data.frame, table="xtable", Description=rep("hi",5), digits=2)
tableStats2 <- function(data.frame, variables=names(data.frame),
table="simple", caption="Summary Statistics",
digits= 4, stats = c("mean", "sd","median"),
description=c(), ...){
library('fBasics') #for skeweness and kurtosis
summary.stats <-
lapply(data.frame[,variables] , function(x) rbind(
if('count' %in% stats) observations = length(x),
if('mean' %in% stats) mean = mean(x),
if('sum' %in% stats) sum = sum(x) ,
if('range' %in% stats) range = range(x) ,
if('sd' %in% stats) sd = sd(x) ,
if('var' %in% stats) var = var(x) ,
if('cv' %in% stats) cv = sd(x)/mean(x) , #coefficient of variation
if('semean' %in% stats) semean = sd(x)/sqrt(length(x)) , #standard error of mean
if('skewness' %in% stats) skewness = skewness(x) ,
if('kurtosis' %in% stats) kurtosis = kurtosis(x) ,
if('min' %in% stats) min = min(x) ,
if('q1' %in% stats) q1= quantile(x,.01) ,
if('q5' %in% stats) q5= quantile(x,.5) ,
if('q10' %in% stats) q10= quantile(x,.10) ,
if('q25' %in% stats) q25= quantile(x,.25) ,
if('median' %in% stats |'q50' %in% stats) median = median(x) ,
if('q75' %in% stats) q75= quantile(x,.75) ,
if('q90' %in% stats) q90= quantile(x,.90) ,
if('q95' %in% stats) q95= quantile(x,.95) ,
if('q99' %in% stats) q99= quantile(x,.99),
if('max' %in% stats) max = max(x) ,
if('iqr' %in% stats) iqe= quantile(x,.75)-quantile(x,.25)
))
columns <- c('count','mean','sum','range','sd','var',
'cv','semean','skewness','kurtosis',
'min','q1','q5','q10','q25','median', 'q75','q90','q95','q99','max','iqr')
summary.stats <- t(data.frame(summary.stats))
colnames(summary.stats) <- columns[which(columns %in% stats)]
if(!is.null(description)){
summary.stats <- cbind(data.frame(description), summary.stats)
summary.stats[,-1] <- as.numeric(as.character(unlist(summary.stats[,-1])))
# colnames(summary.stats[,-1]) <- columns[which(columns %in% stats)]
if(table=="latex"|table=="xtable"){
library(xtable)
summary.stats <- xtable(summary.stats, caption=caption, digits=digits, ...)
return(summary.stats)
}else if(table=="table" | table=="simple"){
return(summary.stats)
} else {stop("Please specify correct table type")}
} else
if(table=="latex"|table=="xtable"){
library(xtable)
summary.stats <- xtable(summary.stats, caption=caption, digits=digits, ...)
return(summary.stats)
} else if(table=="table" | table=="simple"){
return(summary.stats)
} else {stop("Please specify correct table type")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment