Skip to content

Instantly share code, notes, and snippets.

@PallCreaker
Created October 24, 2016 02:42
Show Gist options
  • Save PallCreaker/3952a58d8b7cafb5ac7c0994ac8a11b8 to your computer and use it in GitHub Desktop.
Save PallCreaker/3952a58d8b7cafb5ac7c0994ac8a11b8 to your computer and use it in GitHub Desktop.
R(統計) 変数毎に基本統計量(summary)を算出する。int,num,factorに対応。 ref: http://qiita.com/PallCreaker/items/17008c97ad4536f4d512
> output <- mySummary(iris)
> output
colnames class inform sum sd mean Min Median Max NA.s
2 Sepal.Length numeric <NA> 876.5 0.8281 5.8433 4.3 5.8 7.9 0
3 Sepal.Width numeric <NA> 458.6 0.4359 3.0573 2 3 4.4 0
4 Petal.Length numeric <NA> 563.7 1.7653 3.758 1 4.35 6.9 0
5 Petal.Width numeric <NA> 179.9 0.7622 1.1993 0.1 1.3 2.5 0
6 Species factor setosa:50, versicolor:50, virginica:50 <NA> <NA> <NA> <NA> <NA> <NA> 0
# 基本統計量を算出
mySummary <- function(df){
# only data frame
if (!is.data.frame(df)) return(NA)
# create new df
new.df <- data.frame(
colnames = NA
,class = NA
,inform = NA
,sum = NA
,sd = NA
,mean = NA
,Min = NA
,Median = NA
,Max = NA
,"NA's" = NA
)
col.count <- ncol(df)
for (i in 1:col.count) {
if (is.integer(df[,i]) || is.numeric(df[,i])) {
cal.sum <- sum(as.numeric(df[,i]), na.rm=TRUE)
cal.mean <- mean(df[,i], na.rm=TRUE)
cal.sd <- sd(df[,i], na.rm=TRUE)
cal.min <- min(df[,i], na.rm=TRUE)
cal.median <- median(df[,i], na.rm=TRUE)
cal.max <- max(df[,i], na.rm=TRUE)
} else if (is.factor(df[,i]) || is.logical(df[,i])) {
tmp <- summary(df[,i])
str.val <- paste(names(tmp), tmp, sep=":")
cal.summary<- paste(str.val, collapse=", ")
}
insert.row <- c(
colnames = colnames(df[i])
,class = class(df[,i])
,inform = ifelse(exists("cal.summary"), cal.summary, NA)
,sum = ifelse(exists("cal.sum"), cal.sum, NA)
,sd = ifelse(exists("cal.sd"), round(cal.sd, 4), NA)
,mean = ifelse(exists("cal.mean"), round(cal.mean, 4), NA)
,Min = ifelse(exists("cal.min"), cal.min, NA)
,Median = ifelse(exists("cal.median"), cal.median, NA)
,Max = ifelse(exists("cal.max"), cal.max, NA)
,"NA's" = sum(is.na(df[,i]))
)
new.df <- rbind(new.df, insert.row)
suppressWarnings(rm("cal.sum","cal.mean","cal.max","cal.median","cal.min","cal.sd","cal.summary"))
}
new.df <- new.df[-1,]
return(new.df)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment